示例#1
0
class Launcher(LoggingConfigurable):
    """Object for encapsulating launching an image for a user"""

    hub_api_token = Unicode(help="The API token for the Hub")
    hub_url = Unicode(help="The URL of the Hub")
    hub_url_local = Unicode(help="The internal URL of the Hub if different")
    @default('hub_url_local')
    def _default_hub_url_local(self):
        return self.hub_url
    create_user = Bool(True, help="Create a new Hub user")
    allow_named_servers = Bool(
        os.getenv('JUPYTERHUB_ALLOW_NAMED_SERVERS', "false") == "true",
        config=True,
        help="Named user servers are allowed. This is used only when authentication is enabled and "
             "to set unique names for user servers."
    )
    named_server_limit_per_user = Integer(
        int(os.getenv('JUPYTERHUB_NAMED_SERVER_LIMIT_PER_USER', 0)),
        config=True,
        help="""Maximum number of concurrent named servers that can be created by a user."""
    )
    retries = Integer(
        4,
        config=True,
        help="""Number of attempts to make on Hub API requests.

        Adds resiliency to intermittent Hub failures,
        most commonly due to Hub, proxy, or ingress interruptions.
        """
    )
    retry_delay = Integer(
        4,
        config=True,
        help="""
        Time (seconds) to wait between retries for Hub API requests.

        Time is scaled exponentially by the retry attempt (i.e. 2, 4, 8, 16 seconds)
        """
    )
    pre_launch_hook = Callable(
        None,
        config=True,
        allow_none=True,
        help="""
        An optional hook function that you can use to implement checks before starting a user's server. 
        For example if you have a non-standard BinderHub deployment, 
        in this hook you can check if the current user has right to launch a new repo.
        
        Receives 5 parameters: launcher, image, username, server_name, repo_url
        """
    )

    async def api_request(self, url, *args, **kwargs):
        """Make an API request to JupyterHub"""
        headers = kwargs.setdefault('headers', {})
        headers.update({'Authorization': 'token %s' % self.hub_api_token})
        hub_api_url = os.getenv('JUPYTERHUB_API_URL', '') or self.hub_url_local + 'hub/api/'
        if not hub_api_url.endswith('/'):
            hub_api_url += '/'
        request_url = hub_api_url + url
        req = HTTPRequest(request_url, *args, **kwargs)
        retry_delay = self.retry_delay
        for i in range(1, self.retries + 1):
            try:
                return await AsyncHTTPClient().fetch(req)
            except HTTPError as e:
                # swallow 409 errors on retry only (not first attempt)
                if i > 1 and e.code == 409 and e.response:
                    self.log.warning("Treating 409 conflict on retry as success")
                    return e.response
                # retry requests that fail with error codes greater than 500
                # because they are likely intermittent issues in the cluster
                # e.g. 502,504 due to ingress issues or Hub relocating,
                # 599 due to connection issues such as Hub restarting
                if e.code >= 500:
                    self.log.error("Error accessing Hub API (using %s): %s", request_url, e)
                    if i == self.retries:
                        # last api request failed, raise the exception
                        raise
                    await gen.sleep(retry_delay)
                    # exponential backoff for consecutive failures
                    retry_delay *= 2
                else:
                    raise

    async def get_user_data(self, username):
        resp = await self.api_request(
            'users/%s' % username,
            method='GET',
        )
        body = json.loads(resp.body.decode('utf-8'))
        return body

    def unique_name_from_repo(self, repo_url):
        """Generate a unique name for a git repo url

        e.g. minrk-binder-example-abc123
        from https://github.com/minrk/binder-example.git
        """
        # start with url path
        if '://' not in repo_url and _ssh_repo_pat.match(repo_url):
            # ssh url
            path = repo_url.split(':', 1)[1]
        else:
            path = urlparse(repo_url).path

        prefix = path.strip('/').replace('/', '-').lower()

        if prefix.endswith('.git'):
            # strip trailing .git
            prefix = prefix[:-4]

        if len(prefix) > 32:
            # if it's long, truncate
            prefix = '{}-{}'.format(prefix[:15], prefix[-15:])

        # add a random suffix to avoid collisions for users on the same image
        return '{}-{}'.format(prefix, ''.join(random.choices(SUFFIX_CHARS, k=SUFFIX_LENGTH)))

    async def launch(self, image, username, server_name='', repo_url='', extra_args=None):
        """Launch a server for a given image

        - creates a temporary user on the Hub if authentication is not enabled
        - spawns a server for temporary/authenticated user
        - generates a token
        - returns a dict containing:
          - `url`: the URL of the server
          - `image`: image spec
          - `repo_url`: the url of the repo
          - `extra_args`: Dictionary of extra arguments passed to the server
          - `token`: the token for the server
        """
        # TODO: validate the image argument?

        # Matches the escaping that JupyterHub does https://github.com/jupyterhub/jupyterhub/blob/c00c3fa28703669b932eb84549654238ff8995dc/jupyterhub/user.py#L427
        escaped_username = quote(username, safe='@~')
        if self.create_user:
            # create a new user
            app_log.info("Creating user %s for image %s", username, image)
            try:
                await self.api_request('users/%s' % escaped_username, body=b'', method='POST')
            except HTTPError as e:
                if e.response:
                    body = e.response.body
                else:
                    body = ''
                app_log.error("Error creating user %s: %s\n%s",
                    username, e, body,
                )
                raise web.HTTPError(500, "Failed to create temporary user for %s" % image)
        elif server_name == '':
            # authentication is enabled but not named servers
            # check if user has a running server ('')
            user_data = await self.get_user_data(escaped_username)
            if server_name in user_data['servers']:
                raise web.HTTPError(409, "User %s already has a running server." % username)
        elif self.named_server_limit_per_user > 0:
            # authentication is enabled with named servers
            # check if user has already reached to the limit of named servers
            user_data = await self.get_user_data(escaped_username)
            len_named_spawners = len([s for s in user_data['servers'] if s != ''])
            if self.named_server_limit_per_user <= len_named_spawners:
                raise web.HTTPError(
                    409,
                    "User {} already has the maximum of {} named servers."
                    "  One must be deleted before a new server can be created".format(
                        username, self.named_server_limit_per_user
                    ),
                )

        if self.pre_launch_hook:
            await maybe_future(self.pre_launch_hook(self, image, username, server_name, repo_url))

        # data to be passed into spawner's user_options during launch
        # and also to be returned into 'ready' state
        data = {'image': image,
                'repo_url': repo_url,
                'token': base64.urlsafe_b64encode(uuid.uuid4().bytes).decode('ascii').rstrip('=\n')}
        if extra_args:
            data.update(extra_args)

        # server name to be used in logs
        _server_name = " {}".format(server_name) if server_name else ''

        # start server
        app_log.info("Starting server%s for user %s with image %s", _server_name, username, image)
        try:
            resp = await self.api_request(
                'users/{}/servers/{}'.format(escaped_username, server_name),
                method='POST',
                body=json.dumps(data).encode('utf8'),
            )
            if resp.code == 202:
                # Server hasn't actually started yet
                # We wait for it!
                # NOTE: This ends up being about ten minutes
                for i in range(64):
                    user_data = await self.get_user_data(escaped_username)
                    if user_data['servers'][server_name]['ready']:
                        break
                    if not user_data['servers'][server_name]['pending']:
                        raise web.HTTPError(500, "Image %s for user %s failed to launch" % (image, username))
                    # FIXME: make this configurable
                    # FIXME: Measure how long it takes for servers to start
                    # and tune this appropriately
                    await gen.sleep(min(1.4 ** i, 10))
                else:
                    raise web.HTTPError(500, "Image %s for user %s took too long to launch" % (image, username))

        except HTTPError as e:
            if e.response:
                body = e.response.body
            else:
                body = ''

            app_log.error("Error starting server{} for user {}: {}\n{}".
                          format(_server_name, username, e, body))
            raise web.HTTPError(500, "Failed to launch image %s" % image)

        data['url'] = self.hub_url + 'user/%s/%s' % (escaped_username, server_name)
        return data
示例#2
0
class Launcher(LoggingConfigurable):
    """Object for encapsulating launching an image for a user"""

    hub_api_token = Unicode(help="The API token for the Hub")
    hub_url = Unicode(help="The URL of the Hub")
    hub_url_local = Unicode(help="The internal URL of the Hub if different")

    @default('hub_url_local')
    def _default_hub_url_local(self):
        return self.hub_url

    create_user = Bool(True, help="Create a new Hub user")
    allow_named_servers = Bool(
        os.getenv('JUPYTERHUB_ALLOW_NAMED_SERVERS', "false") == "true",
        config=True,
        help=
        "Named user servers are allowed. This is used only when authentication is enabled and "
        "to set unique names for user servers.")
    named_server_limit_per_user = Integer(
        int(os.getenv('JUPYTERHUB_NAMED_SERVER_LIMIT_PER_USER', 0)),
        config=True,
        help=
        """Maximum number of concurrent named servers that can be created by a user."""
    )
    retries = Integer(4,
                      config=True,
                      help="""Number of attempts to make on Hub API requests.

        Adds resiliency to intermittent Hub failures,
        most commonly due to Hub, proxy, or ingress interruptions.
        """)
    retry_delay = Integer(4,
                          config=True,
                          help="""
        Time (seconds) to wait between retries for Hub API requests.

        Time is scaled exponentially by the retry attempt (i.e. 2, 4, 8, 16 seconds)
        """)
    pre_launch_hook = Callable(None,
                               config=True,
                               allow_none=True,
                               help="""
        An optional hook function that you can use to implement checks before starting a user's server.
        For example if you have a non-standard BinderHub deployment,
        in this hook you can check if the current user has right to launch a new repo.

        Receives 5 parameters: launcher, image, username, server_name, repo_url
        """)
    launch_timeout = Integer(
        600,
        config=True,
        help="""
        Wait this many seconds until server is ready, raise TimeoutError otherwise.
        """,
    )

    async def api_request(self, url, *args, **kwargs):
        """Make an API request to JupyterHub"""
        headers = kwargs.setdefault('headers', {})
        headers.update({'Authorization': f'token {self.hub_api_token}'})
        hub_api_url = os.getenv('JUPYTERHUB_API_URL',
                                '') or self.hub_url_local + 'hub/api/'
        if not hub_api_url.endswith('/'):
            hub_api_url += '/'
        request_url = hub_api_url + url
        req = HTTPRequest(request_url, *args, **kwargs)
        retry_delay = self.retry_delay
        for i in range(1, self.retries + 1):
            try:
                return await AsyncHTTPClient().fetch(req)
            except HTTPError as e:
                # swallow 409 errors on retry only (not first attempt)
                if i > 1 and e.code == 409 and e.response:
                    self.log.warning(
                        "Treating 409 conflict on retry as success")
                    return e.response
                # retry requests that fail with error codes greater than 500
                # because they are likely intermittent issues in the cluster
                # e.g. 502,504 due to ingress issues or Hub relocating,
                # 599 due to connection issues such as Hub restarting
                if e.code >= 500:
                    self.log.error("Error accessing Hub API (using %s): %s",
                                   request_url, e)
                    if i == self.retries:
                        # last api request failed, raise the exception
                        raise
                    await gen.sleep(retry_delay)
                    # exponential backoff for consecutive failures
                    retry_delay *= 2
                else:
                    raise

    async def get_user_data(self, username):
        resp = await self.api_request(
            f'users/{username}',
            method='GET',
        )
        body = json.loads(resp.body.decode('utf-8'))
        return body

    def unique_name_from_repo(self, repo_url):
        """Generate a unique name for a git repo url

        e.g. minrk-binder-example-abc123
        from https://github.com/minrk/binder-example.git
        """
        # start with url path
        if '://' not in repo_url and _ssh_repo_pat.match(repo_url):
            # ssh url
            path = repo_url.split(':', 1)[1]
        else:
            path = urlparse(repo_url).path

        prefix = path.strip('/').replace('/', '-').lower()

        if prefix.endswith('.git'):
            # strip trailing .git
            prefix = prefix[:-4]

        if len(prefix) > 32:
            # if it's long, truncate
            prefix = '{}-{}'.format(prefix[:15], prefix[-15:])

        # add a random suffix to avoid collisions for users on the same image
        return '{}-{}'.format(
            prefix, ''.join(random.choices(SUFFIX_CHARS, k=SUFFIX_LENGTH)))

    async def launch(
        self,
        image,
        username,
        server_name="",
        repo_url="",
        extra_args=None,
        event_callback=None,
    ):
        """Launch a server for a given image

        - creates a temporary user on the Hub if authentication is not enabled
        - spawns a server for temporary/authenticated user
        - generates a token
        - returns a dict containing:
          - `url`: the URL of the server
          - `image`: image spec
          - `repo_url`: the url of the repo
          - `extra_args`: Dictionary of extra arguments passed to the server
          - `token`: the token for the server
        """
        # TODO: validate the image argument?

        # Matches the escaping that JupyterHub does https://github.com/jupyterhub/jupyterhub/blob/c00c3fa28703669b932eb84549654238ff8995dc/jupyterhub/user.py#L427
        escaped_username = quote(username, safe='@~')
        if self.create_user:
            # create a new user
            app_log.info("Creating user %s for image %s", username, image)
            try:
                await self.api_request(f'users/{escaped_username}',
                                       body=b'',
                                       method='POST')
            except HTTPError as e:
                if e.response:
                    body = e.response.body
                else:
                    body = ''
                app_log.error(
                    "Error creating user %s: %s\n%s",
                    username,
                    e,
                    body,
                )
                raise web.HTTPError(
                    500, f"Failed to create temporary user for {image}")
        elif server_name == '':
            # authentication is enabled but not named servers
            # check if user has a running server ('')
            user_data = await self.get_user_data(escaped_username)
            if server_name in user_data['servers']:
                raise web.HTTPError(
                    409, f"User {username} already has a running server.")
        elif self.named_server_limit_per_user > 0:
            # authentication is enabled with named servers
            # check if user has already reached to the limit of named servers
            user_data = await self.get_user_data(escaped_username)
            len_named_spawners = len(
                [s for s in user_data['servers'] if s != ''])
            if self.named_server_limit_per_user <= len_named_spawners:
                raise web.HTTPError(
                    409,
                    "User {} already has the maximum of {} named servers."
                    "  One must be deleted before a new server can be created".
                    format(username, self.named_server_limit_per_user),
                )

        if self.pre_launch_hook:
            await maybe_future(
                self.pre_launch_hook(self, image, username, server_name,
                                     repo_url))

        # data to be passed into spawner's user_options during launch
        # and also to be returned into 'ready' state
        data = {
            'image':
            image,
            'repo_url':
            repo_url,
            'token':
            base64.urlsafe_b64encode(
                uuid.uuid4().bytes).decode('ascii').rstrip('=\n')
        }
        if extra_args:
            data.update(extra_args)

        # server name to be used in logs
        _server_name = " {}".format(server_name) if server_name else ''

        # start server
        app_log.info(
            f"Starting server{_server_name} for user {username} with image {image} extra_args {extra_args}"
        )
        ready_event_future = asyncio.Future()

        def _cancel_ready_event(f=None):
            if not ready_event_future.done():
                if f and f.exception():
                    ready_event_future.set_exception(f.exception())
                else:
                    ready_event_future.cancel()

        try:
            resp = await self.api_request(
                'users/{}/servers/{}'.format(escaped_username, server_name),
                method='POST',
                body=json.dumps(data).encode('utf8'),
            )
            # listen for pending spawn (launch) events until server is ready
            # do this even if previous request finished!
            buffer_list = []

            async def handle_chunk(chunk):
                lines = b"".join(buffer_list + [chunk]).split(b"\n\n")
                # the last item in the list is usually an empty line ('')
                # but it can be the partial line after the last `\n\n`,
                # so put it back on the buffer to handle with the next chunk
                buffer_list[:] = [lines[-1]]
                for line in lines[:-1]:
                    if line:
                        line = line.decode("utf8", "replace")
                    if line and line.startswith("data:"):
                        event = json.loads(line.split(":", 1)[1])
                        if event_callback:
                            await event_callback(event)

                        # stream ends when server is ready or fails
                        if event.get("ready", False):
                            if not ready_event_future.done():
                                ready_event_future.set_result(event)
                        elif event.get("failed", False):
                            if not ready_event_future.done():
                                ready_event_future.set_exception(
                                    web.HTTPError(
                                        500,
                                        event.get("message", "unknown error")))

            url_parts = ["users", username]
            if server_name:
                url_parts.extend(["servers", server_name, "progress"])
            else:
                url_parts.extend(["server/progress"])
            progress_api_url = url_path_join(*url_parts)
            self.log.debug(
                f"Requesting progress for {username}: {progress_api_url}")
            resp_future = self.api_request(
                progress_api_url,
                streaming_callback=lambda chunk: asyncio.ensure_future(
                    handle_chunk(chunk)),
                request_timeout=self.launch_timeout,
            )
            try:
                await gen.with_timeout(timedelta(seconds=self.launch_timeout),
                                       resp_future)
            except (gen.TimeoutError, TimeoutError):
                _cancel_ready_event()
                raise web.HTTPError(
                    500,
                    f"Image {image} for user {username} took too long to launch. Sufficient resources may not currently be available.",
                )

        except HTTPError as e:
            _cancel_ready_event()
            if e.response:
                body = e.response.body
            else:
                body = ''

            app_log.error(
                f"Error starting server{_server_name} for user {username}: {e}\n{body}"
            )
            raise web.HTTPError(500, f"Failed to launch image {image}")
        except Exception:
            _cancel_ready_event()
            raise

        # verify that the server is running!
        try:
            # this should already be done, but it's async so wait a finite time
            ready_event = await gen.with_timeout(timedelta(seconds=5),
                                                 ready_event_future)
        except (gen.TimeoutError, TimeoutError):
            raise web.HTTPError(
                500, f"Image {image} for user {username} failed to launch")

        data["url"] = self.hub_url + f"user/{escaped_username}/{server_name}"
        self.log.debug(data["url"])
        return data
示例#3
0
class BinderHub(Application):
    """An Application for starting a builder."""
    @default('log_level')
    def _log_level(self):
        return logging.INFO

    aliases = {
        'log-level': 'Application.log_level',
        'f': 'BinderHub.config_file',
        'config': 'BinderHub.config_file',
        'port': 'BinderHub.port',
    }

    flags = {
        'debug': ({
            'BinderHub': {
                'debug': True
            }
        }, "Enable debug HTTP serving & debug logging")
    }

    config_file = Unicode('binderhub_config.py',
                          help="""
        Config file to load.

        If a relative path is provided, it is taken relative to current directory
        """,
                          config=True)

    google_analytics_code = Unicode(None,
                                    allow_none=True,
                                    help="""
        The Google Analytics code to use on the main page.

        Note that we'll respect Do Not Track settings, despite the fact that GA does not.
        We will not load the GA scripts on browsers with DNT enabled.
        """,
                                    config=True)

    google_analytics_domain = Unicode('auto',
                                      help="""
        The Google Analytics domain to use on the main page.

        By default this is set to 'auto', which sets it up for current domain and all
        subdomains. This can be set to a more restrictive domain here for better privacy
        """,
                                      config=True)

    about_message = Unicode('',
                            help="""
        Additional message to display on the about page.

        Will be directly inserted into the about page's source so you can use
        raw HTML.
        """,
                            config=True)

    banner_message = Unicode('',
                             help="""
        Message to display in a banner on all pages.

        The value will be inserted "as is" into a HTML <div> element
        with grey background, located at the top of the BinderHub pages. Raw
        HTML is supported.
        """,
                             config=True)

    extra_footer_scripts = Dict({},
                                help="""
        Extra bits of JavaScript that should be loaded in footer of each page.

        Only the values are set up as scripts. Keys are used only
        for sorting.

        Omit the <script> tag. This should be primarily used for
        analytics code.
        """,
                                config=True)

    base_url = Unicode('/',
                       help="The base URL of the entire application",
                       config=True)

    @validate('base_url')
    def _valid_base_url(self, proposal):
        if not proposal.value.startswith('/'):
            proposal.value = '/' + proposal.value
        if not proposal.value.endswith('/'):
            proposal.value = proposal.value + '/'
        return proposal.value

    badge_base_url = Union(trait_types=[Unicode(), Callable()],
                           help="""
        Base URL to use when generating launch badges.
        Can also be a function that is passed the current handler and returns
        the badge base URL, or "" for the default.

        For example, you could get the badge_base_url from a custom HTTP
        header, the Referer header, or from a request parameter
        """,
                           config=True)

    @default('badge_base_url')
    def _badge_base_url_default(self):
        return ''

    @validate('badge_base_url')
    def _valid_badge_base_url(self, proposal):
        if callable(proposal.value):
            return proposal.value
        # add a trailing slash only when a value is set
        if proposal.value and not proposal.value.endswith('/'):
            proposal.value = proposal.value + '/'
        return proposal.value

    cors_allow_origin = Unicode("",
                                help="""
        Origins that can access the BinderHub API.

        Sets the Access-Control-Allow-Origin header in the spawned
        notebooks. Set to '*' to allow any origin to access spawned
        notebook servers.

        See also BinderSpawner.cors_allow_origin in the binderhub spawner
        mixin for setting this property on the spawned notebooks.
        """,
                                config=True)

    auth_enabled = Bool(False,
                        help="""If JupyterHub authentication enabled,
        require user to login (don't create temporary users during launch) and
        start the new server for the logged in user.""",
                        config=True)

    port = Integer(8585,
                   help="""
        Port for the builder to listen on.
        """,
                   config=True)

    appendix = Unicode(
        help="""
        Appendix to pass to repo2docker

        A multi-line string of Docker directives to run.
        Since the build context cannot be affected,
        ADD will typically not be useful.

        This should be a Python string template.
        It will be formatted with at least the following names available:

        - binder_url: the shareable URL for the current image
          (e.g. for sharing links to the current Binder)
        - repo_url: the repository URL used to build the image
        """,
        config=True,
    )

    sticky_builds = Bool(
        False,
        help="""
        Attempt to assign builds for the same repository to the same node.

        In order to speed up re-builds of a repository all its builds will
        be assigned to the same node in the cluster.

        Note: This feature only works if you also enable docker-in-docker support.
        """,
        config=True,
    )

    use_registry = Bool(True,
                        help="""
        Set to true to push images to a registry & check for images in registry.

        Set to false to use only local docker images. Useful when running
        in a single node.
        """,
                        config=True)

    build_class = Type(Build,
                       help="""
        The class used to build repo2docker images.

        Must inherit from binderhub.build.Build
        """,
                       config=True)

    registry_class = Type(DockerRegistry,
                          help="""
        The class used to Query a Docker registry.

        Must inherit from binderhub.registry.DockerRegistry
        """,
                          config=True)

    per_repo_quota = Integer(
        0,
        help="""
        Maximum number of concurrent users running from a given repo.

        Limits the amount of Binder that can be consumed by a single repo.

        0 (default) means no quotas.
        """,
        config=True,
    )

    pod_quota = Integer(
        None,
        help="""
        The number of concurrent pods this hub has been designed to support.

        This quota is used as an indication for how much above or below the
        design capacity a hub is running.

        Attempts to launch new pods once the quota has been reached will fail.

        The default corresponds to no quota, 0 means the hub can't accept pods
        (maybe because it is in maintenance mode), and any positive integer
        sets the quota.
        """,
        allow_none=True,
        config=True,
    )

    per_repo_quota_higher = Integer(
        0,
        help="""
        Maximum number of concurrent users running from a higher-quota repo.

        Limits the amount of Binder that can be consumed by a single repo. This
        quota is a second limit for repos with special status. See the
        `high_quota_specs` parameter of RepoProvider classes for usage.

        0 (default) means no quotas.
        """,
        config=True,
    )

    log_tail_lines = Integer(
        100,
        help="""
        Limit number of log lines to show when connecting to an already running build.
        """,
        config=True,
    )

    push_secret = Unicode('binder-build-docker-config',
                          allow_none=True,
                          help="""
        A kubernetes secret object that provides credentials for pushing built images.
        """,
                          config=True)

    image_prefix = Unicode("",
                           help="""
        Prefix for all built docker images.

        If you are pushing to gcr.io, this would start with:
            gcr.io/<your-project-name>/

        Set according to whatever registry you are pushing to.

        Defaults to "", which is probably not what you want :)
        """,
                           config=True)

    build_memory_request = ByteSpecification(
        0,
        help="""
        Amount of memory to request when scheduling a build

        0 reserves no memory.

        This is used as the request for the pod that is spawned to do the building,
        even though the pod itself will not be using that much memory
        since the docker building is happening outside the pod.
        However, it makes kubernetes aware of the resources being used,
        and lets it schedule more intelligently.
        """,
        config=True,
    )
    build_memory_limit = ByteSpecification(
        0,
        help="""
        Max amount of memory allocated for each image build process.

        0 sets no limit.

        This is applied to the docker build itself via repo2docker,
        though it is also applied to our pod that submits the build,
        even though that pod will rarely consume much memory.
        Still, it makes it easier to see the resource limits in place via kubernetes.
        """,
        config=True,
    )

    debug = Bool(False,
                 help="""
        Turn on debugging.
        """,
                 config=True)

    build_docker_host = Unicode("/var/run/docker.sock",
                                config=True,
                                help="""
        The docker URL repo2docker should use to build the images.

        Currently, only paths are supported, and they are expected to be available on
        all the hosts.
        """)

    @validate('build_docker_host')
    def docker_build_host_validate(self, proposal):
        parts = urlparse(proposal.value)
        if parts.scheme != 'unix' or parts.netloc != '':
            raise TraitError(
                "Only unix domain sockets on same node are supported for build_docker_host"
            )
        return proposal.value

    build_docker_config = Dict(None,
                               allow_none=True,
                               help="""
        A dict which will be merged into the .docker/config.json of the build container (repo2docker)
        Here, you could for example pass proxy settings as described here:
        https://docs.docker.com/network/proxy/#configure-the-docker-client

        Note: if you provide your own push_secret, this values wont
        have an effect, as the push_secrets will overwrite
        .docker/config.json
        In this case, make sure that you include your config in your push_secret
        """,
                               config=True)

    hub_api_token = Unicode(
        help="""API token for talking to the JupyterHub API""",
        config=True,
    )

    @default('hub_api_token')
    def _default_hub_token(self):
        return os.environ.get('JUPYTERHUB_API_TOKEN', '')

    hub_url = Unicode(
        help="""
        The base URL of the JupyterHub instance where users will run.

        e.g. https://hub.mybinder.org/
        """,
        config=True,
    )

    hub_url_local = Unicode(
        help="""
        The base URL of the JupyterHub instance for local/internal traffic

        If local/internal network connections from the BinderHub process should access
        JupyterHub using a different URL than public/external traffic set this, default
        is hub_url
        """,
        config=True,
    )

    @default('hub_url_local')
    def _default_hub_url_local(self):
        return self.hub_url

    @validate('hub_url', 'hub_url_local')
    def _add_slash(self, proposal):
        """trait validator to ensure hub_url ends with a trailing slash"""
        if proposal.value is not None and not proposal.value.endswith('/'):
            return proposal.value + '/'
        return proposal.value

    build_namespace = Unicode(help="""
        Kubernetes namespace to spawn build pods in.

        Note that the push_secret must refer to a secret in this namespace.
        """,
                              config=True)

    @default('build_namespace')
    def _default_build_namespace(self):
        return os.environ.get('BUILD_NAMESPACE', 'default')

    build_image = Unicode('quay.io/jupyterhub/repo2docker:2021.08.0',
                          help="""
        The repo2docker image to be used for doing builds
        """,
                          config=True)

    build_node_selector = Dict({},
                               config=True,
                               help="""
        Select the node where build pod runs on.
        """)

    repo_providers = Dict(
        {
            'gh': GitHubRepoProvider,
            'gist': GistRepoProvider,
            'git': GitRepoProvider,
            'gl': GitLabRepoProvider,
            'zenodo': ZenodoProvider,
            'figshare': FigshareProvider,
            'hydroshare': HydroshareProvider,
            'dataverse': DataverseProvider,
        },
        config=True,
        help="""
        List of Repo Providers to register and try
        """)

    @validate('repo_providers')
    def _validate_repo_providers(self, proposal):
        """trait validator to ensure there is at least one repo provider"""
        if not proposal.value:
            raise TraitError("Please provide at least one repo provider")

        if any([
                not issubclass(provider, RepoProvider)
                for provider in proposal.value.values()
        ]):
            raise TraitError(
                "Repository providers should inherit from 'binderhub.RepoProvider'"
            )

        return proposal.value

    concurrent_build_limit = Integer(
        32, config=True, help="""The number of concurrent builds to allow.""")
    executor_threads = Integer(
        5,
        config=True,
        help="""The number of threads to use for blocking calls

        Should generally be a small number because we don't
        care about high concurrency here, just not blocking the webserver.
        This executor is not used for long-running tasks (e.g. builds).
        """,
    )
    build_cleanup_interval = Integer(
        60,
        config=True,
        help=
        """Interval (in seconds) for how often stopped build pods will be deleted."""
    )
    build_max_age = Integer(3600 * 4,
                            config=True,
                            help="""Maximum age of builds

        Builds that are still running longer than this
        will be killed.
        """)

    build_token_check_origin = Bool(
        True,
        config=True,
        help="""Whether to validate build token origin.

        False disables the origin check.
        """)

    build_token_expires_seconds = Integer(
        300,
        config=True,
        help="""Expiry (in seconds) of build tokens

        These are generally only used to authenticate a single request
        from a page, so should be short-lived.
        """,
    )

    build_token_secret = Union(
        [Unicode(), Bytes()],
        config=True,
        help="""Secret used to sign build tokens

        Lightweight validation of same-origin requests
        """,
    )

    @validate("build_token_secret")
    def _validate_build_token_secret(self, proposal):
        if isinstance(proposal.value, str):
            # allow hex string for text-only input formats
            return a2b_hex(proposal.value)
        return proposal.value

    @default("build_token_secret")
    def _default_build_token_secret(self):
        if os.environ.get("BINDERHUB_BUILD_TOKEN_SECRET"):
            return a2b_hex(os.environ["BINDERHUB_BUILD_TOKEN_SECRET"])
        app_log.warning(
            "Generating random build token secret."
            " Set BinderHub.build_token_secret to avoid this warning.")
        return secrets.token_bytes(32)

    # FIXME: Come up with a better name for it?
    builder_required = Bool(True,
                            config=True,
                            help="""
        If binderhub should try to continue to run without a working build infrastructure.

        Build infrastructure is kubernetes cluster + docker. This is useful for pure HTML/CSS/JS local development.
        """)

    ban_networks = Dict(
        config=True,
        help="""
        Dict of networks from which requests should be rejected with 403

        Keys are CIDR notation (e.g. '1.2.3.4/32'),
        values are a label used in log / error messages.
        CIDR strings will be parsed with `ipaddress.ip_network()`.
        """,
    )

    @validate("ban_networks")
    def _cast_ban_networks(self, proposal):
        """Cast CIDR strings to IPv[4|6]Network objects"""
        networks = {}
        for cidr, message in proposal.value.items():
            networks[ipaddress.ip_network(cidr)] = message

        return networks

    ban_networks_min_prefix_len = Integer(
        1,
        help="The shortest prefix in ban_networks",
    )

    @observe("ban_networks")
    def _update_prefix_len(self, change):
        if not change.new:
            min_len = 1
        else:
            min_len = min(net.prefixlen for net in change.new)
        self.ban_networks_min_prefix_len = min_len or 1

    tornado_settings = Dict(config=True,
                            help="""
        additional settings to pass through to tornado.

        can include things like additional headers, etc.
        """)

    template_variables = Dict(
        config=True,
        help="Extra variables to supply to jinja templates when rendering.",
    )

    template_path = Unicode(
        help=
        "Path to search for custom jinja templates, before using the default templates.",
        config=True,
    )

    @default('template_path')
    def _template_path_default(self):
        return os.path.join(HERE, 'templates')

    extra_static_path = Unicode(
        help='Path to search for extra static files.',
        config=True,
    )

    extra_static_url_prefix = Unicode(
        '/extra_static/',
        help='Url prefix to serve extra static files.',
        config=True,
    )

    normalized_origin = Unicode(
        '',
        config=True,
        help=
        'Origin to use when emitting events. Defaults to hostname of request when empty'
    )

    allowed_metrics_ips = Set(
        help=
        'List of IPs or networks allowed to GET /metrics. Defaults to all.',
        config=True)

    @staticmethod
    def add_url_prefix(prefix, handlers):
        """add a url prefix to handlers"""
        for i, tup in enumerate(handlers):
            lis = list(tup)
            lis[0] = url_path_join(prefix, tup[0])
            handlers[i] = tuple(lis)
        return handlers

    def init_pycurl(self):
        try:
            AsyncHTTPClient.configure(
                "tornado.curl_httpclient.CurlAsyncHTTPClient")
        except ImportError as e:
            self.log.debug(
                "Could not load pycurl: %s\npycurl is recommended if you have a large number of users.",
                e)
        # set max verbosity of curl_httpclient at INFO
        # because debug-logging from curl_httpclient
        # includes every full request and response
        if self.log_level < logging.INFO:
            curl_log = logging.getLogger('tornado.curl_httpclient')
            curl_log.setLevel(logging.INFO)

    def initialize(self, *args, **kwargs):
        """Load configuration settings."""
        super().initialize(*args, **kwargs)
        self.load_config_file(self.config_file)
        # hook up tornado logging
        if self.debug:
            self.log_level = logging.DEBUG
        tornado.options.options.logging = logging.getLevelName(self.log_level)
        tornado.log.enable_pretty_logging()
        self.log = tornado.log.app_log

        self.init_pycurl()

        # initialize kubernetes config
        if self.builder_required:
            try:
                kubernetes.config.load_incluster_config()
            except kubernetes.config.ConfigException:
                kubernetes.config.load_kube_config()
            self.tornado_settings[
                "kubernetes_client"] = self.kube_client = kubernetes.client.CoreV1Api(
                )

        # times 2 for log + build threads
        self.build_pool = ThreadPoolExecutor(self.concurrent_build_limit * 2)
        # default executor for asyncifying blocking calls (e.g. to kubernetes, docker).
        # this should not be used for long-running requests
        self.executor = ThreadPoolExecutor(self.executor_threads)

        jinja_options = dict(autoescape=True, )
        template_paths = [self.template_path]
        base_template_path = self._template_path_default()
        if base_template_path not in template_paths:
            # add base templates to the end, so they are looked up at last after custom templates
            template_paths.append(base_template_path)
        loader = ChoiceLoader([
            # first load base templates with prefix
            PrefixLoader({'templates': FileSystemLoader([base_template_path])},
                         '/'),
            # load all templates
            FileSystemLoader(template_paths)
        ])
        jinja_env = Environment(loader=loader, **jinja_options)
        if self.use_registry:
            registry = self.registry_class(parent=self)
        else:
            registry = None

        self.launcher = Launcher(
            parent=self,
            hub_url=self.hub_url,
            hub_url_local=self.hub_url_local,
            hub_api_token=self.hub_api_token,
            create_user=not self.auth_enabled,
        )

        self.event_log = EventLog(parent=self)

        for schema_file in glob(os.path.join(HERE, 'event-schemas', '*.json')):
            with open(schema_file) as f:
                self.event_log.register_schema(json.load(f))

        self.tornado_settings.update({
            "log_function":
            log_request,
            "push_secret":
            self.push_secret,
            "image_prefix":
            self.image_prefix,
            "debug":
            self.debug,
            "hub_url":
            self.hub_url,
            "launcher":
            self.launcher,
            "appendix":
            self.appendix,
            "ban_networks":
            self.ban_networks,
            "ban_networks_min_prefix_len":
            self.ban_networks_min_prefix_len,
            "build_namespace":
            self.build_namespace,
            "build_image":
            self.build_image,
            "build_node_selector":
            self.build_node_selector,
            "build_pool":
            self.build_pool,
            "build_token_check_origin":
            self.build_token_check_origin,
            "build_token_secret":
            self.build_token_secret,
            "build_token_expires_seconds":
            self.build_token_expires_seconds,
            "sticky_builds":
            self.sticky_builds,
            "log_tail_lines":
            self.log_tail_lines,
            "pod_quota":
            self.pod_quota,
            "per_repo_quota":
            self.per_repo_quota,
            "per_repo_quota_higher":
            self.per_repo_quota_higher,
            "repo_providers":
            self.repo_providers,
            "rate_limiter":
            RateLimiter(parent=self),
            "use_registry":
            self.use_registry,
            "build_class":
            self.build_class,
            "registry":
            registry,
            "traitlets_config":
            self.config,
            "google_analytics_code":
            self.google_analytics_code,
            "google_analytics_domain":
            self.google_analytics_domain,
            "about_message":
            self.about_message,
            "banner_message":
            self.banner_message,
            "extra_footer_scripts":
            self.extra_footer_scripts,
            "jinja2_env":
            jinja_env,
            "build_memory_limit":
            self.build_memory_limit,
            "build_memory_request":
            self.build_memory_request,
            "build_docker_host":
            self.build_docker_host,
            "build_docker_config":
            self.build_docker_config,
            "base_url":
            self.base_url,
            "badge_base_url":
            self.badge_base_url,
            "static_path":
            os.path.join(HERE, "static"),
            "static_url_prefix":
            url_path_join(self.base_url, "static/"),
            "template_variables":
            self.template_variables,
            "executor":
            self.executor,
            "auth_enabled":
            self.auth_enabled,
            "event_log":
            self.event_log,
            "normalized_origin":
            self.normalized_origin,
            "allowed_metrics_ips":
            set(map(ipaddress.ip_network, self.allowed_metrics_ips))
        })
        if self.auth_enabled:
            self.tornado_settings['cookie_secret'] = os.urandom(32)
        if self.cors_allow_origin:
            self.tornado_settings.setdefault(
                'headers',
                {})['Access-Control-Allow-Origin'] = self.cors_allow_origin

        handlers = [
            (r'/metrics', MetricsHandler),
            (r'/versions', VersionHandler),
            (r"/build/([^/]+)/(.+)", BuildHandler),
            (r"/v2/([^/]+)/(.+)", ParameterizedMainHandler),
            (r"/repo/([^/]+)/([^/]+)(/.*)?", LegacyRedirectHandler),
            (r'/~([^/]+/.*)', UserRedirectHandler),
            # for backward-compatible mybinder.org badge URLs
            # /assets/images/badge.svg
            (r'/assets/(images/badge\.svg)', tornado.web.StaticFileHandler, {
                'path': self.tornado_settings['static_path']
            }),
            # /badge.svg
            (r'/(badge\.svg)', tornado.web.StaticFileHandler, {
                'path':
                os.path.join(self.tornado_settings['static_path'], 'images')
            }),
            # /badge_logo.svg
            (r'/(badge\_logo\.svg)', tornado.web.StaticFileHandler, {
                'path':
                os.path.join(self.tornado_settings['static_path'], 'images')
            }),
            # /logo_social.png
            (r'/(logo\_social\.png)', tornado.web.StaticFileHandler, {
                'path':
                os.path.join(self.tornado_settings['static_path'], 'images')
            }),
            # /favicon_XXX.ico
            (r'/(favicon\_fail\.ico)', tornado.web.StaticFileHandler, {
                'path':
                os.path.join(self.tornado_settings['static_path'], 'images')
            }),
            (r'/(favicon\_success\.ico)', tornado.web.StaticFileHandler, {
                'path':
                os.path.join(self.tornado_settings['static_path'], 'images')
            }),
            (r'/(favicon\_building\.ico)', tornado.web.StaticFileHandler, {
                'path':
                os.path.join(self.tornado_settings['static_path'], 'images')
            }),
            (r'/about', AboutHandler),
            (r'/health', HealthHandler, {
                'hub_url': self.hub_url_local
            }),
            (r'/_config', ConfigHandler),
            (r'/', MainHandler),
            (r'.*', Custom404),
        ]
        handlers = self.add_url_prefix(self.base_url, handlers)
        if self.extra_static_path:
            handlers.insert(-1, (re.escape(
                url_path_join(self.base_url, self.extra_static_url_prefix)) +
                                 r"(.*)", tornado.web.StaticFileHandler, {
                                     'path': self.extra_static_path
                                 }))
        if self.auth_enabled:
            oauth_redirect_uri = os.getenv('JUPYTERHUB_OAUTH_CALLBACK_URL') or \
                                 url_path_join(self.base_url, 'oauth_callback')
            oauth_redirect_uri = urlparse(oauth_redirect_uri).path
            handlers.insert(
                -1, (re.escape(oauth_redirect_uri), HubOAuthCallbackHandler))
        self.tornado_app = tornado.web.Application(handlers,
                                                   **self.tornado_settings)

    def stop(self):
        self.http_server.stop()
        self.build_pool.shutdown()

    async def watch_build_pods(self):
        """Watch build pods

        Every build_cleanup_interval:
        - delete stopped build pods
        - delete running build pods older than build_max_age
        """
        while True:
            try:
                await asyncio.wrap_future(
                    self.executor.submit(lambda: Build.cleanup_builds(
                        self.kube_client,
                        self.build_namespace,
                        self.build_max_age,
                    )))
            except Exception:
                app_log.exception("Failed to cleanup build pods")
            await asyncio.sleep(self.build_cleanup_interval)

    def start(self, run_loop=True):
        self.log.info("BinderHub starting on port %i", self.port)
        self.http_server = HTTPServer(
            self.tornado_app,
            xheaders=True,
        )
        self.http_server.listen(self.port)
        if self.builder_required:
            asyncio.ensure_future(self.watch_build_pods())
        if run_loop:
            tornado.ioloop.IOLoop.current().start()
示例#4
0
class EventLog(Configurable):
    """
    Send structured events to a logging sink
    """
    handlers_maker = Callable(None,
                              config=True,
                              allow_none=True,
                              help="""
        Callable that returns a list of logging.Handler instances to send events to.

        When set to None (the default), events are discarded.
        """)

    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)

        self.log = logging.getLogger(__name__)
        # We don't want events to show up in the default logs
        self.log.propagate = False
        self.log.setLevel(logging.INFO)

        if self.handlers_maker:
            self.handlers = self.handlers_maker(self)
            formatter = jsonlogger.JsonFormatter(json_serializer=_skip_message)
            for handler in self.handlers:
                handler.setFormatter(formatter)
                self.log.addHandler(handler)

        self.schemas = {}

    def register_schema(self, schema):
        """
        Register a given JSON Schema with this event emitter

        'version' and '$id' are required fields.
        """
        # Check if our schema itself is valid
        # This throws an exception if it isn't valid
        jsonschema.validators.validator_for(schema).check_schema(schema)

        # Check that the properties we require are present
        required_schema_fields = {'$id', 'version'}
        for rsf in required_schema_fields:
            if rsf not in schema:
                raise ValueError(f'{rsf} is required in schema specification')

        # Make sure reserved, auto-added fields are not in schema
        reserved_fields = {'timestamp', 'schema', 'version'}
        for rf in reserved_fields:
            if rf in schema['properties']:
                raise ValueError(
                    f'{rf} field is reserved by event emitter & can not be explicitly set in schema'
                )

        self.schemas[(schema['$id'], schema['version'])] = schema

    def emit(self, schema_name, version, event):
        """
        Emit event with given schema / version in a capsule.
        """
        if not self.handlers_maker:
            # If we don't have a handler setup, ignore everything
            return

        if (schema_name, version) not in self.schemas:
            raise ValueError(
                f'Schema {schema_name} version {version} not registered')
        schema = self.schemas[(schema_name, version)]
        jsonschema.validate(event, schema)

        capsule = {
            'timestamp': datetime.utcnow().isoformat() + 'Z',
            'schema': schema_name,
            'version': version
        }
        capsule.update(event)
        self.log.info(capsule)