Пример #1
0
class NotebookApp(BaseIPythonApplication):

    name = 'ipython-notebook'

    description = """
        The IPython HTML Notebook.
        
        This launches a Tornado based HTML Notebook Server that serves up an
        HTML5/Javascript Notebook client.
    """
    examples = _examples

    classes = IPythonConsoleApp.classes + [
        MappingKernelManager, NotebookManager, FileNotebookManager,
        NotebookNotary
    ]
    flags = Dict(flags)
    aliases = Dict(aliases)

    subcommands = dict(list=(NbserverListApp,
                             NbserverListApp.description.splitlines()[0]), )

    kernel_argv = List(Unicode)

    def _log_level_default(self):
        return logging.INFO

    def _log_format_default(self):
        """override default log format to include time"""
        return u"%(asctime)s.%(msecs).03d [%(name)s]%(highlevel)s %(message)s"

    # create requested profiles by default, if they don't exist:
    auto_create = Bool(True)

    # file to be opened in the notebook server
    file_to_run = Unicode('', config=True)

    def _file_to_run_changed(self, name, old, new):
        path, base = os.path.split(new)
        if path:
            self.file_to_run = base
            self.notebook_dir = path

    # Network related information

    allow_origin = Unicode('',
                           config=True,
                           help="""Set the Access-Control-Allow-Origin header

        Use '*' to allow any origin to access your server.

        Takes precedence over allow_origin_pat.
        """)

    allow_origin_pat = Unicode(
        '',
        config=True,
        help=
        """Use a regular expression for the Access-Control-Allow-Origin header

        Requests from an origin matching the expression will get replies with:

            Access-Control-Allow-Origin: origin

        where `origin` is the origin of the request.

        Ignored if allow_origin is set.
        """)

    allow_credentials = Bool(
        False,
        config=True,
        help="Set the Access-Control-Allow-Credentials: true header")

    ip = Unicode('localhost',
                 config=True,
                 help="The IP address the notebook server will listen on.")

    def _ip_changed(self, name, old, new):
        if new == u'*': self.ip = u''

    port = Integer(8888,
                   config=True,
                   help="The port the notebook server will listen on.")
    port_retries = Integer(
        50,
        config=True,
        help=
        "The number of additional ports to try if the specified port is not available."
    )

    certfile = Unicode(
        u'',
        config=True,
        help="""The full path to an SSL/TLS certificate file.""")

    keyfile = Unicode(
        u'',
        config=True,
        help="""The full path to a private key file for usage with SSL/TLS.""")

    cookie_secret = Bytes(b'',
                          config=True,
                          help="""The random bytes used to secure cookies.
        By default this is a new random number every time you start the Notebook.
        Set it to a value in a config file to enable logins to persist across server sessions.
        
        Note: Cookie secrets should be kept private, do not share config files with
        cookie_secret stored in plaintext (you can read the value from a file).
        """)

    def _cookie_secret_default(self):
        return os.urandom(1024)

    password = Unicode(u'',
                       config=True,
                       help="""Hashed password to use for web authentication.

                      To generate, type in a python/IPython shell:

                        from IPython.lib import passwd; passwd()

                      The string should be of the form type:salt:hashed-password.
                      """)

    open_browser = Bool(True,
                        config=True,
                        help="""Whether to open in a browser after starting.
                        The specific browser used is platform dependent and
                        determined by the python standard library `webbrowser`
                        module, unless it is overridden using the --browser
                        (NotebookApp.browser) configuration option.
                        """)

    browser = Unicode(u'',
                      config=True,
                      help="""Specify what command to use to invoke a web
                      browser when opening the notebook. If not specified, the
                      default browser will be determined by the `webbrowser`
                      standard library module, which allows setting of the
                      BROWSER environment variable to override it.
                      """)

    webapp_settings = Dict(
        config=True,
        help="Supply overrides for the tornado.web.Application that the "
        "IPython notebook uses.")

    jinja_environment_options = Dict(
        config=True,
        help="Supply extra arguments that will be passed to Jinja environment."
    )

    enable_mathjax = Bool(
        True,
        config=True,
        help="""Whether to enable MathJax for typesetting math/TeX

        MathJax is the javascript library IPython uses to render math/LaTeX. It is
        very large, so you may want to disable it if you have a slow internet
        connection, or for offline use of the notebook.

        When disabled, equations etc. will appear as their untransformed TeX source.
        """)

    def _enable_mathjax_changed(self, name, old, new):
        """set mathjax url to empty if mathjax is disabled"""
        if not new:
            self.mathjax_url = u''

    base_url = Unicode('/',
                       config=True,
                       help='''The base URL for the notebook server.

                               Leading and trailing slashes can be omitted,
                               and will automatically be added.
                               ''')

    def _base_url_changed(self, name, old, new):
        if not new.startswith('/'):
            self.base_url = '/' + new
        elif not new.endswith('/'):
            self.base_url = new + '/'

    base_project_url = Unicode('/',
                               config=True,
                               help="""DEPRECATED use base_url""")

    def _base_project_url_changed(self, name, old, new):
        self.log.warn("base_project_url is deprecated, use base_url")
        self.base_url = new

    extra_static_paths = List(
        Unicode,
        config=True,
        help="""Extra paths to search for serving static files.
        
        This allows adding javascript/css to be available from the notebook server machine,
        or overriding individual files in the IPython""")

    def _extra_static_paths_default(self):
        return [os.path.join(self.profile_dir.location, 'static')]

    @property
    def static_file_path(self):
        """return extra paths + the default location"""
        return self.extra_static_paths + [DEFAULT_STATIC_FILES_PATH]

    nbextensions_path = List(
        Unicode,
        config=True,
        help=
        """paths for Javascript extensions. By default, this is just IPYTHONDIR/nbextensions"""
    )

    def _nbextensions_path_default(self):
        return [os.path.join(get_ipython_dir(), 'nbextensions')]

    mathjax_url = Unicode("", config=True, help="""The url for MathJax.js.""")

    def _mathjax_url_default(self):
        if not self.enable_mathjax:
            return u''
        static_url_prefix = self.webapp_settings.get(
            "static_url_prefix", url_path_join(self.base_url, "static"))

        # try local mathjax, either in nbextensions/mathjax or static/mathjax
        for (url_prefix, search_path) in [
            (url_path_join(self.base_url,
                           "nbextensions"), self.nbextensions_path),
            (static_url_prefix, self.static_file_path),
        ]:
            self.log.debug("searching for local mathjax in %s", search_path)
            try:
                mathjax = filefind(os.path.join('mathjax', 'MathJax.js'),
                                   search_path)
            except IOError:
                continue
            else:
                url = url_path_join(url_prefix, u"mathjax/MathJax.js")
                self.log.info("Serving local MathJax from %s at %s", mathjax,
                              url)
                return url

        if os.path.exists("/usr/share/javascript/mathjax/MathJax.js"):
            self.log.info("Using system MathJax")
            return u"/mathjax/MathJax.js"

        # no local mathjax, serve from CDN
        url = u"https://cdn.mathjax.org/mathjax/latest/MathJax.js"
        self.log.info("Using MathJax from CDN: %s", url)
        return url

    def _mathjax_url_changed(self, name, old, new):
        if new and not self.enable_mathjax:
            # enable_mathjax=False overrides mathjax_url
            self.mathjax_url = u''
        else:
            self.log.info("Using MathJax: %s", new)

    notebook_manager_class = DottedObjectName(
        'IPython.html.services.notebooks.filenbmanager.FileNotebookManager',
        config=True,
        help='The notebook manager class to use.')

    trust_xheaders = Bool(
        False,
        config=True,
        help=
        ("Whether to trust or not X-Scheme/X-Forwarded-Proto and X-Real-Ip/X-Forwarded-For headers"
         "sent by the upstream reverse proxy. Necessary if the proxy handles SSL"
         ))

    info_file = Unicode()

    def _info_file_default(self):
        info_file = "nbserver-%s.json" % os.getpid()
        return os.path.join(self.profile_dir.security_dir, info_file)

    notebook_dir = Unicode(
        py3compat.getcwd(),
        config=True,
        help="The directory to use for notebooks and kernels.")

    def _notebook_dir_changed(self, name, old, new):
        """Do a bit of validation of the notebook dir."""
        if not os.path.isabs(new):
            # If we receive a non-absolute path, make it absolute.
            self.notebook_dir = os.path.abspath(new)
            return
        if not os.path.isdir(new):
            raise TraitError("No such notebook dir: %r" % new)

        # setting App.notebook_dir implies setting notebook and kernel dirs as well
        self.config.FileNotebookManager.notebook_dir = new
        self.config.MappingKernelManager.root_dir = new

    def parse_command_line(self, argv=None):
        super(NotebookApp, self).parse_command_line(argv)

        if self.extra_args:
            arg0 = self.extra_args[0]
            f = os.path.abspath(arg0)
            self.argv.remove(arg0)
            if not os.path.exists(f):
                self.log.critical("No such file or directory: %s", f)
                self.exit(1)

            # Use config here, to ensure that it takes higher priority than
            # anything that comes from the profile.
            c = Config()
            if os.path.isdir(f):
                c.NotebookApp.notebook_dir = f
            elif os.path.isfile(f):
                c.NotebookApp.file_to_run = f
            self.update_config(c)

    def init_kernel_argv(self):
        """construct the kernel arguments"""
        # Scrub frontend-specific flags
        self.kernel_argv = swallow_argv(self.argv, notebook_aliases,
                                        notebook_flags)
        if any(arg.startswith(u'--pylab') for arg in self.kernel_argv):
            self.log.warn('\n    '.join([
                "Starting all kernels in pylab mode is not recommended,",
                "and will be disabled in a future release.",
                "Please use the %matplotlib magic to enable matplotlib instead.",
                "pylab implies many imports, which can have confusing side effects",
                "and harm the reproducibility of your notebooks.",
            ]))
        # Kernel should inherit default config file from frontend
        self.kernel_argv.append("--IPKernelApp.parent_appname='%s'" %
                                self.name)
        # Kernel should get *absolute* path to profile directory
        self.kernel_argv.extend(["--profile-dir", self.profile_dir.location])

    def init_configurables(self):
        # force Session default to be secure
        default_secure(self.config)
        self.kernel_manager = MappingKernelManager(
            parent=self,
            log=self.log,
            kernel_argv=self.kernel_argv,
            connection_dir=self.profile_dir.security_dir,
        )
        kls = import_item(self.notebook_manager_class)
        self.notebook_manager = kls(parent=self, log=self.log)
        self.session_manager = SessionManager(parent=self, log=self.log)
        self.cluster_manager = ClusterManager(parent=self, log=self.log)
        self.cluster_manager.update_profiles()

    def init_logging(self):
        # This prevents double log messages because tornado use a root logger that
        # self.log is a child of. The logging module dipatches log messages to a log
        # and all of its ancenstors until propagate is set to False.
        self.log.propagate = False

        # hook up tornado 3's loggers to our app handlers
        for name in ('access', 'application', 'general'):
            logger = logging.getLogger('tornado.%s' % name)
            logger.parent = self.log
            logger.setLevel(self.log.level)

    def init_webapp(self):
        """initialize tornado webapp and httpserver"""
        self.webapp_settings['allow_origin'] = self.allow_origin
        if self.allow_origin_pat:
            self.webapp_settings['allow_origin_pat'] = re.compile(
                self.allow_origin_pat)
        self.webapp_settings['allow_credentials'] = self.allow_credentials

        self.web_app = NotebookWebApplication(
            self, self.kernel_manager, self.notebook_manager,
            self.cluster_manager, self.session_manager, self.log,
            self.base_url, self.webapp_settings,
            self.jinja_environment_options)
        if self.certfile:
            ssl_options = dict(certfile=self.certfile)
            # Disable SSLv3, since its use is discouraged.
            ssl_options['ssl_version'] = ssl.PROTOCOL_TLSv1
            if self.keyfile:
                ssl_options['keyfile'] = self.keyfile
        else:
            ssl_options = None

        self.web_app.password = self.password
        self.http_server = httpserver.HTTPServer(self.web_app,
                                                 ssl_options=ssl_options,
                                                 xheaders=self.trust_xheaders)
        if not self.ip:
            warning = "WARNING: The notebook server is listening on all IP addresses"
            if ssl_options is None:
                self.log.critical(warning + " and not using encryption. This "
                                  "is not recommended.")
            if not self.password:
                self.log.critical(
                    warning + " and not using authentication. "
                    "This is highly insecure and not recommended.")
        success = None
        for port in random_ports(self.port, self.port_retries + 1):
            try:
                self.http_server.listen(port, self.ip)
            except socket.error as e:
                if e.errno == errno.EADDRINUSE:
                    self.log.info(
                        'The port %i is already in use, trying another random port.'
                        % port)
                    continue
                elif e.errno in (errno.EACCES,
                                 getattr(errno, 'WSAEACCES', errno.EACCES)):
                    self.log.warn("Permission to listen on port %i denied" %
                                  port)
                    continue
                else:
                    raise
            else:
                self.port = port
                success = True
                break
        if not success:
            self.log.critical(
                'ERROR: the notebook server could not be started because '
                'no available port could be found.')
            self.exit(1)

    @property
    def display_url(self):
        ip = self.ip if self.ip else '[all ip addresses on your system]'
        return self._url(ip)

    @property
    def connection_url(self):
        ip = self.ip if self.ip else 'localhost'
        return self._url(ip)

    def _url(self, ip):
        proto = 'https' if self.certfile else 'http'
        return "%s://%s:%i%s" % (proto, ip, self.port, self.base_url)

    def init_signal(self):
        if not sys.platform.startswith('win'):
            signal.signal(signal.SIGINT, self._handle_sigint)
        signal.signal(signal.SIGTERM, self._signal_stop)
        if hasattr(signal, 'SIGUSR1'):
            # Windows doesn't support SIGUSR1
            signal.signal(signal.SIGUSR1, self._signal_info)
        if hasattr(signal, 'SIGINFO'):
            # only on BSD-based systems
            signal.signal(signal.SIGINFO, self._signal_info)

    def _handle_sigint(self, sig, frame):
        """SIGINT handler spawns confirmation dialog"""
        # register more forceful signal handler for ^C^C case
        signal.signal(signal.SIGINT, self._signal_stop)
        # request confirmation dialog in bg thread, to avoid
        # blocking the App
        thread = threading.Thread(target=self._confirm_exit)
        thread.daemon = True
        thread.start()

    def _restore_sigint_handler(self):
        """callback for restoring original SIGINT handler"""
        signal.signal(signal.SIGINT, self._handle_sigint)

    def _confirm_exit(self):
        """confirm shutdown on ^C
        
        A second ^C, or answering 'y' within 5s will cause shutdown,
        otherwise original SIGINT handler will be restored.
        
        This doesn't work on Windows.
        """
        # FIXME: remove this delay when pyzmq dependency is >= 2.1.11
        time.sleep(0.1)
        info = self.log.info
        info('interrupted')
        print(self.notebook_info())
        sys.stdout.write("Shutdown this notebook server (y/[n])? ")
        sys.stdout.flush()
        r, w, x = select.select([sys.stdin], [], [], 5)
        if r:
            line = sys.stdin.readline()
            if line.lower().startswith('y') and 'n' not in line.lower():
                self.log.critical("Shutdown confirmed")
                ioloop.IOLoop.instance().stop()
                return
        else:
            print("No answer for 5s:", end=' ')
        print("resuming operation...")
        # no answer, or answer is no:
        # set it back to original SIGINT handler
        # use IOLoop.add_callback because signal.signal must be called
        # from main thread
        ioloop.IOLoop.instance().add_callback(self._restore_sigint_handler)

    def _signal_stop(self, sig, frame):
        self.log.critical("received signal %s, stopping", sig)
        ioloop.IOLoop.instance().stop()

    def _signal_info(self, sig, frame):
        print(self.notebook_info())

    def init_components(self):
        pass

    @catch_config_error
    def initialize(self, argv=None):
        super(NotebookApp, self).initialize(argv)
        self.init_logging()
        self.init_kernel_argv()
        self.init_configurables()
        self.init_components()
        self.init_webapp()
        self.init_signal()

    def cleanup_kernels(self):
        """Shutdown all kernels.
        
        The kernels will shutdown themselves when this process no longer exists,
        but explicit shutdown allows the KernelManagers to cleanup the connection files.
        """
        self.log.info('Shutting down kernels')
        self.kernel_manager.shutdown_all()

    def notebook_info(self):
        "Return the current working directory and the server url information"
        info = self.notebook_manager.info_string() + "\n"
        info += "%d active kernels \n" % len(self.kernel_manager._kernels)
        return info + "The IPython Notebook is running at: %s" % self.display_url

    def server_info(self):
        """Return a JSONable dict of information about this server."""
        return {
            'url': self.connection_url,
            'hostname': self.ip if self.ip else 'localhost',
            'port': self.port,
            'secure': bool(self.certfile),
            'base_url': self.base_url,
            'notebook_dir': os.path.abspath(self.notebook_dir),
        }

    def write_server_info_file(self):
        """Write the result of server_info() to the JSON file info_file."""
        with open(self.info_file, 'w') as f:
            json.dump(self.server_info(), f, indent=2)

    def remove_server_info_file(self):
        """Remove the nbserver-<pid>.json file created for this server.
        
        Ignores the error raised when the file has already been removed.
        """
        try:
            os.unlink(self.info_file)
        except OSError as e:
            if e.errno != errno.ENOENT:
                raise

    def start(self):
        """ Start the IPython Notebook server app, after initialization
        
        This method takes no arguments so all configuration and initialization
        must be done prior to calling this method."""
        if self.subapp is not None:
            return self.subapp.start()

        info = self.log.info
        for line in self.notebook_info().split("\n"):
            info(line)
        info(
            "Use Control-C to stop this server and shut down all kernels (twice to skip confirmation)."
        )

        self.write_server_info_file()

        if self.open_browser or self.file_to_run:
            try:
                browser = webbrowser.get(self.browser or None)
            except webbrowser.Error as e:
                self.log.warn('No web browser found: %s.' % e)
                browser = None

            if self.file_to_run:
                fullpath = os.path.join(self.notebook_dir, self.file_to_run)
                if not os.path.exists(fullpath):
                    self.log.critical("%s does not exist" % fullpath)
                    self.exit(1)

                uri = url_path_join('notebooks', self.file_to_run)
            else:
                uri = 'tree'
            if browser:
                b = lambda: browser.open(
                    url_path_join(self.connection_url, uri), new=2)
                threading.Thread(target=b).start()
        try:
            ioloop.IOLoop.instance().start()
        except KeyboardInterrupt:
            info("Interrupted...")
        finally:
            self.cleanup_kernels()
            self.remove_server_info_file()
Пример #2
0
class NotebookApp(BaseIPythonApplication):

    name = 'ipython-notebook'
    default_config_file_name = 'ipython_notebook_config.py'

    description = """
        The IPython HTML Notebook.
        
        This launches a Tornado based HTML Notebook Server that serves up an
        HTML5/Javascript Notebook client.
    """
    examples = _examples

    classes = IPythonConsoleApp.classes + [
        MappingKernelManager, NotebookManager, FileNotebookManager
    ]
    flags = Dict(flags)
    aliases = Dict(aliases)

    kernel_argv = List(Unicode)

    log_level = Enum(
        (0, 10, 20, 30, 40, 50, 'DEBUG', 'INFO', 'WARN', 'ERROR', 'CRITICAL'),
        default_value=logging.INFO,
        config=True,
        help="Set the log level by value or name.")

    # create requested profiles by default, if they don't exist:
    auto_create = Bool(True)

    # file to be opened in the notebook server
    file_to_run = Unicode('')

    # Network related information.

    ip = Unicode(LOCALHOST,
                 config=True,
                 help="The IP address the notebook server will listen on.")

    def _ip_changed(self, name, old, new):
        if new == u'*': self.ip = u''

    port = Integer(8888,
                   config=True,
                   help="The port the notebook server will listen on.")
    port_retries = Integer(
        50,
        config=True,
        help=
        "The number of additional ports to try if the specified port is not available."
    )

    certfile = Unicode(
        u'',
        config=True,
        help="""The full path to an SSL/TLS certificate file.""")

    keyfile = Unicode(
        u'',
        config=True,
        help="""The full path to a private key file for usage with SSL/TLS.""")

    password = Unicode(u'',
                       config=True,
                       help="""Hashed password to use for web authentication.

                      To generate, type in a python/IPython shell:

                        from IPython.lib import passwd; passwd()

                      The string should be of the form type:salt:hashed-password.
                      """)

    open_browser = Bool(True,
                        config=True,
                        help="""Whether to open in a browser after starting.
                        The specific browser used is platform dependent and
                        determined by the python standard library `webbrowser`
                        module, unless it is overridden using the --browser
                        (NotebookApp.browser) configuration option.
                        """)

    browser = Unicode(u'',
                      config=True,
                      help="""Specify what command to use to invoke a web
                      browser when opening the notebook. If not specified, the
                      default browser will be determined by the `webbrowser`
                      standard library module, which allows setting of the
                      BROWSER environment variable to override it.
                      """)

    read_only = Bool(False,
                     config=True,
                     help="Whether to prevent editing/execution of notebooks.")

    use_less = Bool(False,
                    config=True,
                    help="""Wether to use Browser Side less-css parsing
                       instead of compiled css version in templates that allows
                       it. This is mainly convenient when working on the less
                       file to avoid a build step, or if user want to overwrite
                       some of the less variables without having to recompile
                       everything.
                       
                       You will need to install the less.js component in the static directory
                       either in the source tree or in your profile folder.
                       """)

    webapp_settings = Dict(
        config=True,
        help="Supply overrides for the tornado.web.Application that the "
        "IPython notebook uses.")

    enable_mathjax = Bool(
        True,
        config=True,
        help="""Whether to enable MathJax for typesetting math/TeX

        MathJax is the javascript library IPython uses to render math/LaTeX. It is
        very large, so you may want to disable it if you have a slow internet
        connection, or for offline use of the notebook.

        When disabled, equations etc. will appear as their untransformed TeX source.
        """)

    def _enable_mathjax_changed(self, name, old, new):
        """set mathjax url to empty if mathjax is disabled"""
        if not new:
            self.mathjax_url = u''

    base_project_url = Unicode('/',
                               config=True,
                               help='''The base URL for the notebook server.

                               Leading and trailing slashes can be omitted,
                               and will automatically be added.
                               ''')

    def _base_project_url_changed(self, name, old, new):
        if not new.startswith('/'):
            self.base_project_url = '/' + new
        elif not new.endswith('/'):
            self.base_project_url = new + '/'

    base_kernel_url = Unicode('/',
                              config=True,
                              help='''The base URL for the kernel server

                               Leading and trailing slashes can be omitted,
                               and will automatically be added.
                               ''')

    def _base_kernel_url_changed(self, name, old, new):
        if not new.startswith('/'):
            self.base_kernel_url = '/' + new
        elif not new.endswith('/'):
            self.base_kernel_url = new + '/'

    websocket_host = Unicode("",
                             config=True,
                             help="""The hostname for the websocket server.""")

    extra_static_paths = List(
        Unicode,
        config=True,
        help="""Extra paths to search for serving static files.
        
        This allows adding javascript/css to be available from the notebook server machine,
        or overriding individual files in the IPython""")

    def _extra_static_paths_default(self):
        return [os.path.join(self.profile_dir.location, 'static')]

    @property
    def static_file_path(self):
        """return extra paths + the default location"""
        return self.extra_static_paths + [
            os.path.join(os.path.dirname(__file__), "static")
        ]

    mathjax_url = Unicode("", config=True, help="""The url for MathJax.js.""")

    def _mathjax_url_default(self):
        if not self.enable_mathjax:
            return u''
        static_url_prefix = self.webapp_settings.get("static_url_prefix",
                                                     "/static/")
        try:
            mathjax = filefind(os.path.join('mathjax', 'MathJax.js'),
                               self.static_file_path)
        except IOError:
            if self.certfile:
                # HTTPS: load from Rackspace CDN, because SSL certificate requires it
                base = u"https://c328740.ssl.cf1.rackcdn.com"
            else:
                base = u"http://cdn.mathjax.org"

            url = base + u"/mathjax/latest/MathJax.js"
            self.log.info("Using MathJax from CDN: %s", url)
            return url
        else:
            self.log.info("Using local MathJax from %s" % mathjax)
            return static_url_prefix + u"mathjax/MathJax.js"

    def _mathjax_url_changed(self, name, old, new):
        if new and not self.enable_mathjax:
            # enable_mathjax=False overrides mathjax_url
            self.mathjax_url = u''
        else:
            self.log.info("Using MathJax: %s", new)

    notebook_manager_class = DottedObjectName(
        'IPython.frontend.html.notebook.filenbmanager.FileNotebookManager',
        config=True,
        help='The notebook manager class to use.')

    def parse_command_line(self, argv=None):
        super(NotebookApp, self).parse_command_line(argv)
        if argv is None:
            argv = sys.argv[1:]

        # Scrub frontend-specific flags
        self.kernel_argv = swallow_argv(argv, notebook_aliases, notebook_flags)
        # Kernel should inherit default config file from frontend
        self.kernel_argv.append("--KernelApp.parent_appname='%s'" % self.name)

        if self.extra_args:
            f = os.path.abspath(self.extra_args[0])
            if os.path.isdir(f):
                nbdir = f
            else:
                self.file_to_run = f
                nbdir = os.path.dirname(f)
            self.config.NotebookManager.notebook_dir = nbdir

    def init_configurables(self):
        # force Session default to be secure
        default_secure(self.config)
        self.kernel_manager = MappingKernelManager(
            config=self.config,
            log=self.log,
            kernel_argv=self.kernel_argv,
            connection_dir=self.profile_dir.security_dir,
        )
        kls = import_item(self.notebook_manager_class)
        self.notebook_manager = kls(config=self.config, log=self.log)
        self.notebook_manager.log_info()
        self.notebook_manager.load_notebook_names()
        self.cluster_manager = ClusterManager(config=self.config, log=self.log)
        self.cluster_manager.update_profiles()

    def init_logging(self):
        # This prevents double log messages because tornado use a root logger that
        # self.log is a child of. The logging module dipatches log messages to a log
        # and all of its ancenstors until propagate is set to False.
        self.log.propagate = False

    def init_webapp(self):
        """initialize tornado webapp and httpserver"""
        self.web_app = NotebookWebApplication(self, self.kernel_manager,
                                              self.notebook_manager,
                                              self.cluster_manager, self.log,
                                              self.base_project_url,
                                              self.webapp_settings)
        if self.certfile:
            ssl_options = dict(certfile=self.certfile)
            if self.keyfile:
                ssl_options['keyfile'] = self.keyfile
        else:
            ssl_options = None
        self.web_app.password = self.password
        self.http_server = httpserver.HTTPServer(self.web_app,
                                                 ssl_options=ssl_options)
        if not self.ip:
            warning = "WARNING: The notebook server is listening on all IP addresses"
            if ssl_options is None:
                self.log.critical(warning + " and not using encryption. This"
                                  "is not recommended.")
            if not self.password and not self.read_only:
                self.log.critical(
                    warning + "and not using authentication."
                    "This is highly insecure and not recommended.")
        success = None
        for port in random_ports(self.port, self.port_retries + 1):
            try:
                self.http_server.listen(port, self.ip)
            except socket.error as e:
                if e.errno != errno.EADDRINUSE:
                    raise
                self.log.info(
                    'The port %i is already in use, trying another random port.'
                    % port)
            else:
                self.port = port
                success = True
                break
        if not success:
            self.log.critical(
                'ERROR: the notebook server could not be started because '
                'no available port could be found.')
            self.exit(1)

    def init_signal(self):
        # FIXME: remove this check when pyzmq dependency is >= 2.1.11
        # safely extract zmq version info:
        try:
            zmq_v = zmq.pyzmq_version_info()
        except AttributeError:
            zmq_v = [int(n) for n in re.findall(r'\d+', zmq.__version__)]
            if 'dev' in zmq.__version__:
                zmq_v.append(999)
            zmq_v = tuple(zmq_v)
        if zmq_v >= (2, 1, 9) and not sys.platform.startswith('win'):
            # This won't work with 2.1.7 and
            # 2.1.9-10 will log ugly 'Interrupted system call' messages,
            # but it will work
            signal.signal(signal.SIGINT, self._handle_sigint)
        signal.signal(signal.SIGTERM, self._signal_stop)

    def _handle_sigint(self, sig, frame):
        """SIGINT handler spawns confirmation dialog"""
        # register more forceful signal handler for ^C^C case
        signal.signal(signal.SIGINT, self._signal_stop)
        # request confirmation dialog in bg thread, to avoid
        # blocking the App
        thread = threading.Thread(target=self._confirm_exit)
        thread.daemon = True
        thread.start()

    def _restore_sigint_handler(self):
        """callback for restoring original SIGINT handler"""
        signal.signal(signal.SIGINT, self._handle_sigint)

    def _confirm_exit(self):
        """confirm shutdown on ^C
        
        A second ^C, or answering 'y' within 5s will cause shutdown,
        otherwise original SIGINT handler will be restored.
        
        This doesn't work on Windows.
        """
        # FIXME: remove this delay when pyzmq dependency is >= 2.1.11
        time.sleep(0.1)
        sys.stdout.write("Shutdown Notebook Server (y/[n])? ")
        sys.stdout.flush()
        r, w, x = select.select([sys.stdin], [], [], 5)
        if r:
            line = sys.stdin.readline()
            if line.lower().startswith('y'):
                self.log.critical("Shutdown confirmed")
                ioloop.IOLoop.instance().stop()
                return
        else:
            print "No answer for 5s:",
        print "resuming operation..."
        # no answer, or answer is no:
        # set it back to original SIGINT handler
        # use IOLoop.add_callback because signal.signal must be called
        # from main thread
        ioloop.IOLoop.instance().add_callback(self._restore_sigint_handler)

    def _signal_stop(self, sig, frame):
        self.log.critical("received signal %s, stopping", sig)
        ioloop.IOLoop.instance().stop()

    @catch_config_error
    def initialize(self, argv=None):
        self.init_logging()
        super(NotebookApp, self).initialize(argv)
        self.init_configurables()
        self.init_webapp()
        self.init_signal()

    def cleanup_kernels(self):
        """Shutdown all kernels.
        
        The kernels will shutdown themselves when this process no longer exists,
        but explicit shutdown allows the KernelManagers to cleanup the connection files.
        """
        self.log.info('Shutting down kernels')
        self.kernel_manager.shutdown_all()

    def start(self):
        ip = self.ip if self.ip else '[all ip addresses on your system]'
        proto = 'https' if self.certfile else 'http'
        info = self.log.info
        info("The IPython Notebook is running at: %s://%s:%i%s" %
             (proto, ip, self.port, self.base_project_url))
        info("Use Control-C to stop this server and shut down all kernels.")

        if self.open_browser or self.file_to_run:
            ip = self.ip or LOCALHOST
            try:
                browser = webbrowser.get(self.browser or None)
            except webbrowser.Error as e:
                self.log.warn('No web browser found: %s.' % e)
                browser = None

            if self.file_to_run:
                name, _ = os.path.splitext(os.path.basename(self.file_to_run))
                url = self.notebook_manager.rev_mapping.get(name, '')
            else:
                url = ''
            if browser:
                b = lambda: browser.open("%s://%s:%i%s%s" % (
                    proto, ip, self.port, self.base_project_url, url),
                                         new=2)
                threading.Thread(target=b).start()
        try:
            ioloop.IOLoop.instance().start()
        except KeyboardInterrupt:
            info("Interrupted...")
        finally:
            self.cleanup_kernels()
Пример #3
0
class ScriptMagics(Magics):
    """Magics for talking to scripts
    
    This defines a base `%%script` cell magic for running a cell
    with a program in a subprocess, and registers a few top-level
    magics that call %%script with common interpreters.
    """
    script_magics = List(
        config=True,
        help="""Extra script cell magics to define
        
        This generates simple wrappers of `%%script foo` as `%%foo`.
        
        If you want to add script magics that aren't on your path,
        specify them in script_paths
        """,
    )

    def _script_magics_default(self):
        """default to a common list of programs"""

        defaults = [
            'sh',
            'bash',
            'perl',
            'ruby',
            'python',
            'python2',
            'python3',
            'pypy',
        ]
        if os.name == 'nt':
            defaults.extend([
                'cmd',
            ])

        return defaults

    script_paths = Dict(
        config=True,
        help=
        """Dict mapping short 'ruby' names to full paths, such as '/opt/secret/bin/ruby'
        
        Only necessary for items in script_magics where the default path will not
        find the right interpreter.
        """)

    def __init__(self, shell=None):
        super(ScriptMagics, self).__init__(shell=shell)
        self._generate_script_magics()
        self.job_manager = BackgroundJobManager()
        self.bg_processes = []
        atexit.register(self.kill_bg_processes)

    def __del__(self):
        self.kill_bg_processes()

    def _generate_script_magics(self):
        cell_magics = self.magics['cell']
        for name in self.script_magics:
            cell_magics[name] = self._make_script_magic(name)

    def _make_script_magic(self, name):
        """make a named magic, that calls %%script with a particular program"""
        # expand to explicit path if necessary:
        script = self.script_paths.get(name, name)

        @magic_arguments.magic_arguments()
        @script_args
        def named_script_magic(line, cell):
            # if line, add it as cl-flags
            if line:
                line = "%s %s" % (script, line)
            else:
                line = script
            return self.shebang(line, cell)

        # write a basic docstring:
        named_script_magic.__doc__ = \
        """%%{name} script magic
        
        Run cells with {script} in a subprocess.
        
        This is a shortcut for `%%script {script}`
        """.format(**locals())

        return named_script_magic

    @magic_arguments.magic_arguments()
    @script_args
    @cell_magic("script")
    def shebang(self, line, cell):
        """Run a cell via a shell command
        
        The `%%script` line is like the #! line of script,
        specifying a program (bash, perl, ruby, etc.) with which to run.
        
        The rest of the cell is run by that program.
        
        Examples
        --------
        ::
        
            In [1]: %%script bash
               ...: for i in 1 2 3; do
               ...:   echo $i
               ...: done
            1
            2
            3
        """
        argv = arg_split(line, posix=not sys.platform.startswith('win'))
        args, cmd = self.shebang.parser.parse_known_args(argv)

        try:
            p = Popen(cmd, stdout=PIPE, stderr=PIPE, stdin=PIPE)
        except OSError as e:
            if e.errno == errno.ENOENT:
                print("Couldn't find program: %r" % cmd[0])
                return
            else:
                raise

        if not cell.endswith('\n'):
            cell += '\n'
        cell = cell.encode('utf8', 'replace')
        if args.bg:
            self.bg_processes.append(p)
            self._gc_bg_processes()
            if args.out:
                self.shell.user_ns[args.out] = p.stdout
            if args.err:
                self.shell.user_ns[args.err] = p.stderr
            self.job_manager.new(self._run_script, p, cell, daemon=True)
            if args.proc:
                self.shell.user_ns[args.proc] = p
            return

        try:
            out, err = p.communicate(cell)
        except KeyboardInterrupt:
            try:
                p.send_signal(signal.SIGINT)
                time.sleep(0.1)
                if p.poll() is not None:
                    print("Process is interrupted.")
                    return
                p.terminate()
                time.sleep(0.1)
                if p.poll() is not None:
                    print("Process is terminated.")
                    return
                p.kill()
                print("Process is killed.")
            except OSError:
                pass
            except Exception as e:
                print("Error while terminating subprocess (pid=%i): %s" \
                    % (p.pid, e))
            return
        out = py3compat.bytes_to_str(out)
        err = py3compat.bytes_to_str(err)
        if args.out:
            self.shell.user_ns[args.out] = out
        else:
            sys.stdout.write(out)
            sys.stdout.flush()
        if args.err:
            self.shell.user_ns[args.err] = err
        else:
            sys.stderr.write(err)
            sys.stderr.flush()

    def _run_script(self, p, cell):
        """callback for running the script in the background"""
        p.stdin.write(cell)
        p.stdin.close()
        p.wait()

    @line_magic("killbgscripts")
    def killbgscripts(self, _nouse_=''):
        """Kill all BG processes started by %%script and its family."""
        self.kill_bg_processes()
        print("All background processes were killed.")

    def kill_bg_processes(self):
        """Kill all BG processes which are still running."""
        for p in self.bg_processes:
            if p.poll() is None:
                try:
                    p.send_signal(signal.SIGINT)
                except:
                    pass
        time.sleep(0.1)
        for p in self.bg_processes:
            if p.poll() is None:
                try:
                    p.terminate()
                except:
                    pass
        time.sleep(0.1)
        for p in self.bg_processes:
            if p.poll() is None:
                try:
                    p.kill()
                except:
                    pass
        self._gc_bg_processes()

    def _gc_bg_processes(self):
        self.bg_processes = [p for p in self.bg_processes if p.poll() is None]
Пример #4
0
class Kernel(Configurable):

    #---------------------------------------------------------------------------
    # Kernel interface
    #---------------------------------------------------------------------------

    # attribute to override with a GUI
    eventloop = Any(None)

    def _eventloop_changed(self, name, old, new):
        """schedule call to eventloop from IOLoop"""
        loop = ioloop.IOLoop.instance()
        loop.add_callback(self.enter_eventloop)

    session = Instance(Session)
    profile_dir = Instance('IPython.core.profiledir.ProfileDir')
    shell_streams = List()
    control_stream = Instance(ZMQStream)
    iopub_socket = Instance(zmq.Socket)
    stdin_socket = Instance(zmq.Socket)
    log = Instance(logging.Logger)

    # identities:
    int_id = Integer(-1)
    ident = Unicode()

    def _ident_default(self):
        return unicode_type(uuid.uuid4())

    # Private interface

    _darwin_app_nap = Bool(
        True,
        config=True,
        help="""Whether to use appnope for compatiblity with OS X App Nap.
        
        Only affects OS X >= 10.9.
        """)

    # track associations with current request
    _allow_stdin = Bool(False)
    _parent_header = Dict()
    _parent_ident = Any(b'')
    # Time to sleep after flushing the stdout/err buffers in each execute
    # cycle.  While this introduces a hard limit on the minimal latency of the
    # execute cycle, it helps prevent output synchronization problems for
    # clients.
    # Units are in seconds.  The minimum zmq latency on local host is probably
    # ~150 microseconds, set this to 500us for now.  We may need to increase it
    # a little if it's not enough after more interactive testing.
    _execute_sleep = Float(0.0005, config=True)

    # Frequency of the kernel's event loop.
    # Units are in seconds, kernel subclasses for GUI toolkits may need to
    # adapt to milliseconds.
    _poll_interval = Float(0.05, config=True)

    # If the shutdown was requested over the network, we leave here the
    # necessary reply message so it can be sent by our registered atexit
    # handler.  This ensures that the reply is only sent to clients truly at
    # the end of our shutdown process (which happens after the underlying
    # IPython shell's own shutdown).
    _shutdown_message = None

    # This is a dict of port number that the kernel is listening on. It is set
    # by record_ports and used by connect_request.
    _recorded_ports = Dict()

    # set of aborted msg_ids
    aborted = Set()

    # Track execution count here. For IPython, we override this to use the
    # execution count we store in the shell.
    execution_count = 0

    def __init__(self, **kwargs):
        super(Kernel, self).__init__(**kwargs)

        # Build dict of handlers for message types
        msg_types = [
            'execute_request',
            'complete_request',
            'inspect_request',
            'history_request',
            'kernel_info_request',
            'connect_request',
            'shutdown_request',
            'apply_request',
        ]
        self.shell_handlers = {}
        for msg_type in msg_types:
            self.shell_handlers[msg_type] = getattr(self, msg_type)

        control_msg_types = msg_types + ['clear_request', 'abort_request']
        self.control_handlers = {}
        for msg_type in control_msg_types:
            self.control_handlers[msg_type] = getattr(self, msg_type)

    def dispatch_control(self, msg):
        """dispatch control requests"""
        idents, msg = self.session.feed_identities(msg, copy=False)
        try:
            msg = self.session.unserialize(msg, content=True, copy=False)
        except:
            self.log.error("Invalid Control Message", exc_info=True)
            return

        self.log.debug("Control received: %s", msg)

        # Set the parent message for side effects.
        self.set_parent(idents, msg)
        self._publish_status(u'busy')

        header = msg['header']
        msg_type = header['msg_type']

        handler = self.control_handlers.get(msg_type, None)
        if handler is None:
            self.log.error("UNKNOWN CONTROL MESSAGE TYPE: %r", msg_type)
        else:
            try:
                handler(self.control_stream, idents, msg)
            except Exception:
                self.log.error("Exception in control handler:", exc_info=True)

        sys.stdout.flush()
        sys.stderr.flush()
        self._publish_status(u'idle')

    def dispatch_shell(self, stream, msg):
        """dispatch shell requests"""
        # flush control requests first
        if self.control_stream:
            self.control_stream.flush()

        idents, msg = self.session.feed_identities(msg, copy=False)
        try:
            msg = self.session.unserialize(msg, content=True, copy=False)
        except:
            self.log.error("Invalid Message", exc_info=True)
            return

        # Set the parent message for side effects.
        self.set_parent(idents, msg)
        self._publish_status(u'busy')

        header = msg['header']
        msg_id = header['msg_id']
        msg_type = msg['header']['msg_type']

        # Print some info about this message and leave a '--->' marker, so it's
        # easier to trace visually the message chain when debugging.  Each
        # handler prints its message at the end.
        self.log.debug('\n*** MESSAGE TYPE:%s***', msg_type)
        self.log.debug('   Content: %s\n   --->\n   ', msg['content'])

        if msg_id in self.aborted:
            self.aborted.remove(msg_id)
            # is it safe to assume a msg_id will not be resubmitted?
            reply_type = msg_type.split('_')[0] + '_reply'
            status = {'status': 'aborted'}
            md = {'engine': self.ident}
            md.update(status)
            self.session.send(stream,
                              reply_type,
                              metadata=md,
                              content=status,
                              parent=msg,
                              ident=idents)
            return

        handler = self.shell_handlers.get(msg_type, None)
        if handler is None:
            self.log.error("UNKNOWN MESSAGE TYPE: %r", msg_type)
        else:
            # ensure default_int_handler during handler call
            sig = signal(SIGINT, default_int_handler)
            self.log.debug("%s: %s", msg_type, msg)
            try:
                handler(stream, idents, msg)
            except Exception:
                self.log.error("Exception in message handler:", exc_info=True)
            finally:
                signal(SIGINT, sig)

        sys.stdout.flush()
        sys.stderr.flush()
        self._publish_status(u'idle')

    def enter_eventloop(self):
        """enter eventloop"""
        self.log.info("entering eventloop %s", self.eventloop)
        for stream in self.shell_streams:
            # flush any pending replies,
            # which may be skipped by entering the eventloop
            stream.flush(zmq.POLLOUT)
        # restore default_int_handler
        signal(SIGINT, default_int_handler)
        while self.eventloop is not None:
            try:
                self.eventloop(self)
            except KeyboardInterrupt:
                # Ctrl-C shouldn't crash the kernel
                self.log.error("KeyboardInterrupt caught in kernel")
                continue
            else:
                # eventloop exited cleanly, this means we should stop (right?)
                self.eventloop = None
                break
        self.log.info("exiting eventloop")

    def start(self):
        """register dispatchers for streams"""
        if self.control_stream:
            self.control_stream.on_recv(self.dispatch_control, copy=False)

        def make_dispatcher(stream):
            def dispatcher(msg):
                return self.dispatch_shell(stream, msg)

            return dispatcher

        for s in self.shell_streams:
            s.on_recv(make_dispatcher(s), copy=False)

        # publish idle status
        self._publish_status('starting')

    def do_one_iteration(self):
        """step eventloop just once"""
        if self.control_stream:
            self.control_stream.flush()
        for stream in self.shell_streams:
            # handle at most one request per iteration
            stream.flush(zmq.POLLIN, 1)
            stream.flush(zmq.POLLOUT)

    def record_ports(self, ports):
        """Record the ports that this kernel is using.

        The creator of the Kernel instance must call this methods if they
        want the :meth:`connect_request` method to return the port numbers.
        """
        self._recorded_ports = ports

    #---------------------------------------------------------------------------
    # Kernel request handlers
    #---------------------------------------------------------------------------

    def _make_metadata(self, other=None):
        """init metadata dict, for execute/apply_reply"""
        new_md = {
            'dependencies_met': True,
            'engine': self.ident,
            'started': datetime.now(),
        }
        if other:
            new_md.update(other)
        return new_md

    def _publish_execute_input(self, code, parent, execution_count):
        """Publish the code request on the iopub stream."""

        self.session.send(self.iopub_socket,
                          u'execute_input', {
                              u'code': code,
                              u'execution_count': execution_count
                          },
                          parent=parent,
                          ident=self._topic('execute_input'))

    def _publish_status(self, status, parent=None):
        """send status (busy/idle) on IOPub"""
        self.session.send(
            self.iopub_socket,
            u'status',
            {u'execution_state': status},
            parent=parent or self._parent_header,
            ident=self._topic('status'),
        )

    def set_parent(self, ident, parent):
        """Set the current parent_header
        
        Side effects (IOPub messages) and replies are associated with
        the request that caused them via the parent_header.
        
        The parent identity is used to route input_request messages
        on the stdin channel.
        """
        self._parent_ident = ident
        self._parent_header = parent

    def send_response(self,
                      stream,
                      msg_or_type,
                      content=None,
                      ident=None,
                      buffers=None,
                      track=False,
                      header=None,
                      metadata=None):
        """Send a response to the message we're currently processing.
        
        This accepts all the parameters of :meth:`IPython.kernel.zmq.session.Session.send`
        except ``parent``.
        
        This relies on :meth:`set_parent` having been called for the current
        message.
        """
        return self.session.send(stream, msg_or_type, content,
                                 self._parent_header, ident, buffers, track,
                                 header, metadata)

    def execute_request(self, stream, ident, parent):
        """handle an execute_request"""

        try:
            content = parent[u'content']
            code = py3compat.cast_unicode_py2(content[u'code'])
            silent = content[u'silent']
            store_history = content.get(u'store_history', not silent)
            user_expressions = content.get('user_expressions', {})
            allow_stdin = content.get('allow_stdin', False)
        except:
            self.log.error("Got bad msg: ")
            self.log.error("%s", parent)
            return

        md = self._make_metadata(parent['metadata'])

        # Re-broadcast our input for the benefit of listening clients, and
        # start computing output
        if not silent:
            self.execution_count += 1
            self._publish_execute_input(code, parent, self.execution_count)

        reply_content = self.do_execute(code, silent, store_history,
                                        user_expressions, allow_stdin)

        # Flush output before sending the reply.
        sys.stdout.flush()
        sys.stderr.flush()
        # FIXME: on rare occasions, the flush doesn't seem to make it to the
        # clients... This seems to mitigate the problem, but we definitely need
        # to better understand what's going on.
        if self._execute_sleep:
            time.sleep(self._execute_sleep)

        # Send the reply.
        reply_content = json_clean(reply_content)

        md['status'] = reply_content['status']
        if reply_content['status'] == 'error' and \
                        reply_content['ename'] == 'UnmetDependency':
            md['dependencies_met'] = False

        reply_msg = self.session.send(stream,
                                      u'execute_reply',
                                      reply_content,
                                      parent,
                                      metadata=md,
                                      ident=ident)

        self.log.debug("%s", reply_msg)

        if not silent and reply_msg['content']['status'] == u'error':
            self._abort_queues()

    def do_execute(self,
                   code,
                   silent,
                   store_history=True,
                   user_experssions=None,
                   allow_stdin=False):
        """Execute user code. Must be overridden by subclasses.
        """
        raise NotImplementedError

    def complete_request(self, stream, ident, parent):
        content = parent['content']
        code = content['code']
        cursor_pos = content['cursor_pos']

        matches = self.do_complete(code, cursor_pos)
        matches = json_clean(matches)
        completion_msg = self.session.send(stream, 'complete_reply', matches,
                                           parent, ident)
        self.log.debug("%s", completion_msg)

    def do_complete(self, code, cursor_pos):
        """Override in subclasses to find completions.
        """
        return {
            'matches': [],
            'cursor_end': cursor_pos,
            'cursor_start': cursor_pos,
            'metadata': {},
            'status': 'ok'
        }

    def inspect_request(self, stream, ident, parent):
        content = parent['content']

        reply_content = self.do_inspect(content['code'], content['cursor_pos'],
                                        content.get('detail_level', 0))
        # Before we send this object over, we scrub it for JSON usage
        reply_content = json_clean(reply_content)
        msg = self.session.send(stream, 'inspect_reply', reply_content, parent,
                                ident)
        self.log.debug("%s", msg)

    def do_inspect(self, code, cursor_pos, detail_level=0):
        """Override in subclasses to allow introspection.
        """
        return {'status': 'ok', 'data': {}, 'metadata': {}, 'found': False}

    def history_request(self, stream, ident, parent):
        content = parent['content']

        reply_content = self.do_history(**content)

        reply_content = json_clean(reply_content)
        msg = self.session.send(stream, 'history_reply', reply_content, parent,
                                ident)
        self.log.debug("%s", msg)

    def do_history(self,
                   hist_access_type,
                   output,
                   raw,
                   session=None,
                   start=None,
                   stop=None,
                   n=None,
                   pattern=None,
                   unique=False):
        """Override in subclasses to access history.
        """
        return {'history': []}

    def connect_request(self, stream, ident, parent):
        if self._recorded_ports is not None:
            content = self._recorded_ports.copy()
        else:
            content = {}
        msg = self.session.send(stream, 'connect_reply', content, parent,
                                ident)
        self.log.debug("%s", msg)

    @property
    def kernel_info(self):
        return {
            'protocol_version': release.kernel_protocol_version,
            'implementation': self.implementation,
            'implementation_version': self.implementation_version,
            'language': self.language,
            'language_version': self.language_version,
            'banner': self.banner,
        }

    def kernel_info_request(self, stream, ident, parent):
        msg = self.session.send(stream, 'kernel_info_reply', self.kernel_info,
                                parent, ident)
        self.log.debug("%s", msg)

    def shutdown_request(self, stream, ident, parent):
        content = self.do_shutdown(parent['content']['restart'])
        self.session.send(stream,
                          u'shutdown_reply',
                          content,
                          parent,
                          ident=ident)
        # same content, but different msg_id for broadcasting on IOPub
        self._shutdown_message = self.session.msg(u'shutdown_reply', content,
                                                  parent)

        self._at_shutdown()
        # call sys.exit after a short delay
        loop = ioloop.IOLoop.instance()
        loop.add_timeout(time.time() + 0.1, loop.stop)

    def do_shutdown(self, restart):
        """Override in subclasses to do things when the frontend shuts down the
        kernel.
        """
        return {'status': 'ok', 'restart': restart}

    #---------------------------------------------------------------------------
    # Engine methods
    #---------------------------------------------------------------------------

    def apply_request(self, stream, ident, parent):
        try:
            content = parent[u'content']
            bufs = parent[u'buffers']
            msg_id = parent['header']['msg_id']
        except:
            self.log.error("Got bad msg: %s", parent, exc_info=True)
            return

        md = self._make_metadata(parent['metadata'])

        reply_content, result_buf = self.do_apply(content, bufs, msg_id, md)

        # put 'ok'/'error' status in header, for scheduler introspection:
        md['status'] = reply_content['status']

        # flush i/o
        sys.stdout.flush()
        sys.stderr.flush()

        self.session.send(stream,
                          u'apply_reply',
                          reply_content,
                          parent=parent,
                          ident=ident,
                          buffers=result_buf,
                          metadata=md)

    def do_apply(self, content, bufs, msg_id, reply_metadata):
        """Override in subclasses to support the IPython parallel framework.
        """
        raise NotImplementedError

    #---------------------------------------------------------------------------
    # Control messages
    #---------------------------------------------------------------------------

    def abort_request(self, stream, ident, parent):
        """abort a specifig msg by id"""
        msg_ids = parent['content'].get('msg_ids', None)
        if isinstance(msg_ids, string_types):
            msg_ids = [msg_ids]
        if not msg_ids:
            self.abort_queues()
        for mid in msg_ids:
            self.aborted.add(str(mid))

        content = dict(status='ok')
        reply_msg = self.session.send(stream,
                                      'abort_reply',
                                      content=content,
                                      parent=parent,
                                      ident=ident)
        self.log.debug("%s", reply_msg)

    def clear_request(self, stream, idents, parent):
        """Clear our namespace."""
        content = self.do_clear()
        self.session.send(stream,
                          'clear_reply',
                          ident=idents,
                          parent=parent,
                          content=content)

    def do_clear(self):
        """Override in subclasses to clear the namespace
        
        This is only required for IPython.parallel.
        """
        raise NotImplementedError

    #---------------------------------------------------------------------------
    # Protected interface
    #---------------------------------------------------------------------------

    def _topic(self, topic):
        """prefixed topic for IOPub messages"""
        if self.int_id >= 0:
            base = "engine.%i" % self.int_id
        else:
            base = "kernel.%s" % self.ident

        return py3compat.cast_bytes("%s.%s" % (base, topic))

    def _abort_queues(self):
        for stream in self.shell_streams:
            if stream:
                self._abort_queue(stream)

    def _abort_queue(self, stream):
        poller = zmq.Poller()
        poller.register(stream.socket, zmq.POLLIN)
        while True:
            idents, msg = self.session.recv(stream, zmq.NOBLOCK, content=True)
            if msg is None:
                return

            self.log.info("Aborting:")
            self.log.info("%s", msg)
            msg_type = msg['header']['msg_type']
            reply_type = msg_type.split('_')[0] + '_reply'

            status = {'status': 'aborted'}
            md = {'engine': self.ident}
            md.update(status)
            reply_msg = self.session.send(stream,
                                          reply_type,
                                          metadata=md,
                                          content=status,
                                          parent=msg,
                                          ident=idents)
            self.log.debug("%s", reply_msg)
            # We need to wait a bit for requests to come in. This can probably
            # be set shorter for true asynchronous clients.
            poller.poll(50)

    def _no_raw_input(self):
        """Raise StdinNotImplentedError if active frontend doesn't support
        stdin."""
        raise StdinNotImplementedError("raw_input was called, but this "
                                       "frontend does not support stdin.")

    def getpass(self, prompt=''):
        """Forward getpass to frontends
        
        Raises
        ------
        StdinNotImplentedError if active frontend doesn't support stdin.
        """
        if not self._allow_stdin:
            raise StdinNotImplementedError(
                "getpass was called, but this frontend does not support input requests."
            )
        return self._input_request(
            prompt,
            self._parent_ident,
            self._parent_header,
            password=True,
        )

    def raw_input(self, prompt=''):
        """Forward raw_input to frontends
        
        Raises
        ------
        StdinNotImplentedError if active frontend doesn't support stdin.
        """
        if not self._allow_stdin:
            raise StdinNotImplementedError(
                "raw_input was called, but this frontend does not support input requests."
            )
        return self._input_request(
            prompt,
            self._parent_ident,
            self._parent_header,
            password=False,
        )

    def _input_request(self, prompt, ident, parent, password=False):
        # Flush output before making the request.
        sys.stderr.flush()
        sys.stdout.flush()
        # flush the stdin socket, to purge stale replies
        while True:
            try:
                self.stdin_socket.recv_multipart(zmq.NOBLOCK)
            except zmq.ZMQError as e:
                if e.errno == zmq.EAGAIN:
                    break
                else:
                    raise

        # Send the input request.
        content = json_clean(dict(prompt=prompt, password=password))
        self.session.send(self.stdin_socket,
                          u'input_request',
                          content,
                          parent,
                          ident=ident)

        # Await a response.
        while True:
            try:
                ident, reply = self.session.recv(self.stdin_socket, 0)
            except Exception:
                self.log.warn("Invalid Message:", exc_info=True)
            except KeyboardInterrupt:
                # re-raise KeyboardInterrupt, to truncate traceback
                raise KeyboardInterrupt
            else:
                break
        try:
            value = py3compat.unicode_to_str(reply['content']['value'])
        except:
            self.log.error("Bad input_reply: %s", parent)
            value = ''
        if value == '\x04':
            # EOF
            raise EOFError
        return value

    def _at_shutdown(self):
        """Actions taken at shutdown by the kernel, called by python's atexit.
        """
        # io.rprint("Kernel at_shutdown") # dbg
        if self._shutdown_message is not None:
            self.session.send(self.iopub_socket,
                              self._shutdown_message,
                              ident=self._topic('shutdown'))
            self.log.debug("%s", self._shutdown_message)
        [s.flush(zmq.POLLOUT) for s in self.shell_streams]
class IPythonConsoleApp(Configurable):
    name = 'ipython-console-mixin'
    default_config_file_name = 'ipython_config.py'

    description = """
        The IPython Mixin Console.
        
        This class contains the common portions of console client (QtConsole,
        ZMQ-based terminal console, etc).  It is not a full console, in that
        launched terminal subprocesses will not be able to accept input.
        
        The Console using this mixing supports various extra features beyond
        the single-process Terminal IPython shell, such as connecting to
        existing kernel, via:
        
            ipython <appname> --existing
        
        as well as tunnel via SSH
        
    """

    classes = classes
    flags = Dict(flags)
    aliases = Dict(aliases)
    kernel_manager_class = BlockingKernelManager

    kernel_argv = List(Unicode)
    # frontend flags&aliases to be stripped when building kernel_argv
    frontend_flags = Any(app_flags)
    frontend_aliases = Any(app_aliases)

    # create requested profiles by default, if they don't exist:
    auto_create = CBool(True)
    # connection info:
    ip = Unicode(LOCALHOST,
                 config=True,
                 help="""Set the kernel\'s IP address [default localhost].
        If the IP address is something other than localhost, then
        Consoles on other machines will be able to connect
        to the Kernel, so be careful!""")

    sshserver = Unicode(
        '',
        config=True,
        help="""The SSH server to use to connect to the kernel.""")
    sshkey = Unicode(
        '',
        config=True,
        help="""Path to the ssh key to use for logging in to the ssh server."""
    )

    hb_port = Int(0,
                  config=True,
                  help="set the heartbeat port [default: random]")
    shell_port = Int(0,
                     config=True,
                     help="set the shell (ROUTER) port [default: random]")
    iopub_port = Int(0,
                     config=True,
                     help="set the iopub (PUB) port [default: random]")
    stdin_port = Int(0,
                     config=True,
                     help="set the stdin (DEALER) port [default: random]")
    connection_file = Unicode(
        '',
        config=True,
        help=
        """JSON file in which to store connection info [default: kernel-<pid>.json]

        This file will contain the IP, ports, and authentication key needed to connect
        clients to this kernel. By default, this file will be created in the security-dir
        of the current profile, but can be specified by absolute path.
        """)

    def _connection_file_default(self):
        return 'kernel-%i.json' % os.getpid()

    existing = CUnicode('',
                        config=True,
                        help="""Connect to an already running kernel""")

    confirm_exit = CBool(
        True,
        config=True,
        help="""
        Set to display confirmation dialog on exit. You can always use 'exit' or 'quit',
        to force a direct exit without any confirmation.""",
    )

    def build_kernel_argv(self, argv=None):
        """build argv to be passed to kernel subprocess"""
        if argv is None:
            argv = sys.argv[1:]
        self.kernel_argv = swallow_argv(argv, self.frontend_aliases,
                                        self.frontend_flags)
        # kernel should inherit default config file from frontend
        self.kernel_argv.append("--KernelApp.parent_appname='%s'" % self.name)

    def init_connection_file(self):
        """find the connection file, and load the info if found.
        
        The current working directory and the current profile's security
        directory will be searched for the file if it is not given by
        absolute path.
        
        When attempting to connect to an existing kernel and the `--existing`
        argument does not match an existing file, it will be interpreted as a
        fileglob, and the matching file in the current profile's security dir
        with the latest access time will be used.
        
        After this method is called, self.connection_file contains the *full path*
        to the connection file, never just its name.
        """
        if self.existing:
            try:
                cf = find_connection_file(self.existing)
            except Exception:
                self.log.critical(
                    "Could not find existing kernel connection file %s",
                    self.existing)
                self.exit(1)
            self.log.info("Connecting to existing kernel: %s" % cf)
            self.connection_file = cf
        else:
            # not existing, check if we are going to write the file
            # and ensure that self.connection_file is a full path, not just the shortname
            try:
                cf = find_connection_file(self.connection_file)
            except Exception:
                # file might not exist
                if self.connection_file == os.path.basename(
                        self.connection_file):
                    # just shortname, put it in security dir
                    cf = os.path.join(self.profile_dir.security_dir,
                                      self.connection_file)
                else:
                    cf = self.connection_file
                self.connection_file = cf

        # should load_connection_file only be used for existing?
        # as it is now, this allows reusing ports if an existing
        # file is requested
        try:
            self.load_connection_file()
        except Exception:
            self.log.error("Failed to load connection file: %r",
                           self.connection_file,
                           exc_info=True)
            self.exit(1)

    def load_connection_file(self):
        """load ip/port/hmac config from JSON connection file"""
        # this is identical to KernelApp.load_connection_file
        # perhaps it can be centralized somewhere?
        try:
            fname = filefind(self.connection_file,
                             ['.', self.profile_dir.security_dir])
        except IOError:
            self.log.debug("Connection File not found: %s",
                           self.connection_file)
            return
        self.log.debug("Loading connection file %s", fname)
        with open(fname) as f:
            s = f.read()
        cfg = json.loads(s)
        if self.ip == LOCALHOST and 'ip' in cfg:
            # not overridden by config or cl_args
            self.ip = cfg['ip']
        for channel in ('hb', 'shell', 'iopub', 'stdin'):
            name = channel + '_port'
            if getattr(self, name) == 0 and name in cfg:
                # not overridden by config or cl_args
                setattr(self, name, cfg[name])
        if 'key' in cfg:
            self.config.Session.key = str_to_bytes(cfg['key'])

    def init_ssh(self):
        """set up ssh tunnels, if needed."""
        if not self.sshserver and not self.sshkey:
            return

        if self.sshkey and not self.sshserver:
            # specifying just the key implies that we are connecting directly
            self.sshserver = self.ip
            self.ip = LOCALHOST

        # build connection dict for tunnels:
        info = dict(ip=self.ip,
                    shell_port=self.shell_port,
                    iopub_port=self.iopub_port,
                    stdin_port=self.stdin_port,
                    hb_port=self.hb_port)

        self.log.info("Forwarding connections to %s via %s" %
                      (self.ip, self.sshserver))

        # tunnels return a new set of ports, which will be on localhost:
        self.ip = LOCALHOST
        try:
            newports = tunnel_to_kernel(info, self.sshserver, self.sshkey)
        except:
            # even catch KeyboardInterrupt
            self.log.error("Could not setup tunnels", exc_info=True)
            self.exit(1)

        self.shell_port, self.iopub_port, self.stdin_port, self.hb_port = newports

        cf = self.connection_file
        base, ext = os.path.splitext(cf)
        base = os.path.basename(base)
        self.connection_file = os.path.basename(base) + '-ssh' + ext
        self.log.critical("To connect another client via this tunnel, use:")
        self.log.critical("--existing %s" % self.connection_file)

    def _new_connection_file(self):
        cf = ''
        while not cf:
            # we don't need a 128b id to distinguish kernels, use more readable
            # 48b node segment (12 hex chars).  Users running more than 32k simultaneous
            # kernels can subclass.
            ident = str(uuid.uuid4()).split('-')[-1]
            cf = os.path.join(self.profile_dir.security_dir,
                              'kernel-%s.json' % ident)
            # only keep if it's actually new.  Protect against unlikely collision
            # in 48b random search space
            cf = cf if not os.path.exists(cf) else ''
        return cf

    def init_kernel_manager(self):
        # Don't let Qt or ZMQ swallow KeyboardInterupts.
        signal.signal(signal.SIGINT, signal.SIG_DFL)

        # Create a KernelManager and start a kernel.
        self.kernel_manager = self.kernel_manager_class(
            ip=self.ip,
            shell_port=self.shell_port,
            iopub_port=self.iopub_port,
            stdin_port=self.stdin_port,
            hb_port=self.hb_port,
            connection_file=self.connection_file,
            config=self.config,
        )
        # start the kernel
        if not self.existing:
            self.kernel_manager.start_kernel(extra_arguments=self.kernel_argv)
        elif self.sshserver:
            # ssh, write new connection file
            self.kernel_manager.write_connection_file()
        atexit.register(self.kernel_manager.cleanup_connection_file)
        self.kernel_manager.start_channels()

    def initialize(self, argv=None):
        """
        Classes which mix this class in should call:
               IPythonConsoleApp.initialize(self,argv)
        """
        self.init_connection_file()
        default_secure(self.config)
        self.init_ssh()
        self.init_kernel_manager()
Пример #6
0
class ControllerFactory(HubFactory):
    """Configurable for setting up a Hub and Schedulers."""

    usethreads = Bool(False, config=True)

    # internal
    children = List()
    mq_class = CStr('zmq.devices.ProcessMonitoredQueue')

    def _usethreads_changed(self, name, old, new):
        self.mq_class = 'zmq.devices.%sMonitoredQueue' % ('Thread' if new else
                                                          'Process')

    def __init__(self, **kwargs):
        super(ControllerFactory, self).__init__(**kwargs)
        self.subconstructors.append(self.construct_schedulers)

    def start(self):
        super(ControllerFactory, self).start()
        child_procs = []
        for child in self.children:
            child.start()
            if isinstance(child, ProcessMonitoredQueue):
                child_procs.append(child.launcher)
            elif isinstance(child, Process):
                child_procs.append(child)
        if child_procs:
            signal_children(child_procs)

    def construct_schedulers(self):
        children = self.children
        mq = import_item(self.mq_class)

        # maybe_inproc = 'inproc://monitor' if self.usethreads else self.monitor_url
        # IOPub relay (in a Process)
        q = mq(zmq.PUB, zmq.SUB, zmq.PUB, 'N/A', 'iopub')
        q.bind_in(self.client_info['iopub'])
        q.bind_out(self.engine_info['iopub'])
        q.setsockopt_out(zmq.SUBSCRIBE, '')
        q.connect_mon(self.monitor_url)
        q.daemon = True
        children.append(q)

        # Multiplexer Queue (in a Process)
        q = mq(zmq.XREP, zmq.XREP, zmq.PUB, 'in', 'out')
        q.bind_in(self.client_info['mux'])
        q.setsockopt_in(zmq.IDENTITY, 'mux')
        q.bind_out(self.engine_info['mux'])
        q.connect_mon(self.monitor_url)
        q.daemon = True
        children.append(q)

        # Control Queue (in a Process)
        q = mq(zmq.XREP, zmq.XREP, zmq.PUB, 'incontrol', 'outcontrol')
        q.bind_in(self.client_info['control'])
        q.setsockopt_in(zmq.IDENTITY, 'control')
        q.bind_out(self.engine_info['control'])
        q.connect_mon(self.monitor_url)
        q.daemon = True
        children.append(q)
        # Task Queue (in a Process)
        if self.scheme == 'pure':
            self.log.warn("task::using pure XREQ Task scheduler")
            q = mq(zmq.XREP, zmq.XREQ, zmq.PUB, 'intask', 'outtask')
            q.bind_in(self.client_info['task'][1])
            q.setsockopt_in(zmq.IDENTITY, 'task')
            q.bind_out(self.engine_info['task'])
            q.connect_mon(self.monitor_url)
            q.daemon = True
            children.append(q)
        elif self.scheme == 'none':
            self.log.warn("task::using no Task scheduler")

        else:
            self.log.info("task::using Python %s Task scheduler" % self.scheme)
            sargs = (self.client_info['task'][1], self.engine_info['task'],
                     self.monitor_url, self.client_info['notification'])
            kwargs = dict(scheme=self.scheme,
                          logname=self.log.name,
                          loglevel=self.log.level,
                          config=dict(self.config))
            q = Process(target=launch_scheduler, args=sargs, kwargs=kwargs)
            q.daemon = True
            children.append(q)
Пример #7
0
class TaskScheduler(SessionFactory):
    """Python TaskScheduler object.

    This is the simplest object that supports msg_id based
    DAG dependencies. *Only* task msg_ids are checked, not
    msg_ids of jobs submitted via the MUX queue.

    """

    hwm = Integer(1,
                  config=True,
                  help="""specify the High Water Mark (HWM) for the downstream
        socket in the Task scheduler. This is the maximum number
        of allowed outstanding tasks on each engine.
        
        The default (1) means that only one task can be outstanding on each
        engine.  Setting TaskScheduler.hwm=0 means there is no limit, and the
        engines continue to be assigned tasks while they are working,
        effectively hiding network latency behind computation, but can result
        in an imbalance of work when submitting many heterogenous tasks all at
        once.  Any positive value greater than one is a compromise between the
        two.

        """)
    scheme_name = Enum(
        ('leastload', 'pure', 'lru', 'plainrandom', 'weighted', 'twobin'),
        'leastload',
        config=True,
        allow_none=False,
        help="""select the task scheduler scheme  [default: Python LRU]
        Options are: 'pure', 'lru', 'plainrandom', 'weighted', 'twobin','leastload'"""
    )

    def _scheme_name_changed(self, old, new):
        self.log.debug("Using scheme %r" % new)
        self.scheme = globals()[new]

    # input arguments:
    scheme = Instance(FunctionType)  # function for determining the destination

    def _scheme_default(self):
        return leastload

    client_stream = Instance(zmqstream.ZMQStream)  # client-facing stream
    engine_stream = Instance(zmqstream.ZMQStream)  # engine-facing stream
    notifier_stream = Instance(zmqstream.ZMQStream)  # hub-facing sub stream
    mon_stream = Instance(zmqstream.ZMQStream)  # hub-facing pub stream

    # internals:
    graph = Dict()  # dict by msg_id of [ msg_ids that depend on key ]
    retries = Dict()  # dict by msg_id of retries remaining (non-neg ints)
    # waiting = List() # list of msg_ids ready to run, but haven't due to HWM
    depending = Dict()  # dict by msg_id of Jobs
    pending = Dict()  # dict by engine_uuid of submitted tasks
    completed = Dict()  # dict by engine_uuid of completed tasks
    failed = Dict()  # dict by engine_uuid of failed tasks
    destinations = Dict(
    )  # dict by msg_id of engine_uuids where jobs ran (reverse of completed+failed)
    clients = Dict()  # dict by msg_id for who submitted the task
    targets = List()  # list of target IDENTs
    loads = List()  # list of engine loads
    # full = Set() # set of IDENTs that have HWM outstanding tasks
    all_completed = Set()  # set of all completed tasks
    all_failed = Set()  # set of all failed tasks
    all_done = Set()  # set of all finished tasks=union(completed,failed)
    all_ids = Set()  # set of all submitted task IDs

    auditor = Instance('zmq.eventloop.ioloop.PeriodicCallback')

    ident = CBytes()  # ZMQ identity. This should just be self.session.session

    # but ensure Bytes
    def _ident_default(self):
        return self.session.bsession

    def start(self):
        self.engine_stream.on_recv(self.dispatch_result, copy=False)
        self.client_stream.on_recv(self.dispatch_submission, copy=False)

        self._notification_handlers = dict(
            registration_notification=self._register_engine,
            unregistration_notification=self._unregister_engine)
        self.notifier_stream.on_recv(self.dispatch_notification)
        self.auditor = ioloop.PeriodicCallback(self.audit_timeouts, 2e3,
                                               self.loop)  # 1 Hz
        self.auditor.start()
        self.log.info("Scheduler started [%s]" % self.scheme_name)

    def resume_receiving(self):
        """Resume accepting jobs."""
        self.client_stream.on_recv(self.dispatch_submission, copy=False)

    def stop_receiving(self):
        """Stop accepting jobs while there are no engines.
        Leave them in the ZMQ queue."""
        self.client_stream.on_recv(None)

    #-----------------------------------------------------------------------
    # [Un]Registration Handling
    #-----------------------------------------------------------------------

    def dispatch_notification(self, msg):
        """dispatch register/unregister events."""
        try:
            idents, msg = self.session.feed_identities(msg)
        except ValueError:
            self.log.warn("task::Invalid Message: %r", msg)
            return
        try:
            msg = self.session.unserialize(msg)
        except ValueError:
            self.log.warn("task::Unauthorized message from: %r" % idents)
            return

        msg_type = msg['header']['msg_type']

        handler = self._notification_handlers.get(msg_type, None)
        if handler is None:
            self.log.error("Unhandled message type: %r" % msg_type)
        else:
            try:
                handler(asbytes(msg['content']['queue']))
            except Exception:
                self.log.error("task::Invalid notification msg: %r",
                               msg,
                               exc_info=True)

    def _register_engine(self, uid):
        """New engine with ident `uid` became available."""
        # head of the line:
        self.targets.insert(0, uid)
        self.loads.insert(0, 0)

        # initialize sets
        self.completed[uid] = set()
        self.failed[uid] = set()
        self.pending[uid] = {}

        # rescan the graph:
        self.update_graph(None)

    def _unregister_engine(self, uid):
        """Existing engine with ident `uid` became unavailable."""
        if len(self.targets) == 1:
            # this was our only engine
            pass

        # handle any potentially finished tasks:
        self.engine_stream.flush()

        # don't pop destinations, because they might be used later
        # map(self.destinations.pop, self.completed.pop(uid))
        # map(self.destinations.pop, self.failed.pop(uid))

        # prevent this engine from receiving work
        idx = self.targets.index(uid)
        self.targets.pop(idx)
        self.loads.pop(idx)

        # wait 5 seconds before cleaning up pending jobs, since the results might
        # still be incoming
        if self.pending[uid]:
            dc = ioloop.DelayedCallback(
                lambda: self.handle_stranded_tasks(uid), 5000, self.loop)
            dc.start()
        else:
            self.completed.pop(uid)
            self.failed.pop(uid)

    def handle_stranded_tasks(self, engine):
        """Deal with jobs resident in an engine that died."""
        lost = self.pending[engine]
        for msg_id in lost.keys():
            if msg_id not in self.pending[engine]:
                # prevent double-handling of messages
                continue

            raw_msg = lost[msg_id][0]
            idents, msg = self.session.feed_identities(raw_msg, copy=False)
            parent = self.session.unpack(msg[1].bytes)
            idents = [engine, idents[0]]

            # build fake error reply
            try:
                raise error.EngineError(
                    "Engine %r died while running task %r" % (engine, msg_id))
            except:
                content = error.wrap_exception()
            # build fake header
            header = dict(
                status='error',
                engine=engine,
                date=datetime.now(),
            )
            msg = self.session.msg('apply_reply',
                                   content,
                                   parent=parent,
                                   subheader=header)
            raw_reply = map(zmq.Message,
                            self.session.serialize(msg, ident=idents))
            # and dispatch it
            self.dispatch_result(raw_reply)

        # finally scrub completed/failed lists
        self.completed.pop(engine)
        self.failed.pop(engine)

    #-----------------------------------------------------------------------
    # Job Submission
    #-----------------------------------------------------------------------
    def dispatch_submission(self, raw_msg):
        """Dispatch job submission to appropriate handlers."""
        # ensure targets up to date:
        self.notifier_stream.flush()
        try:
            idents, msg = self.session.feed_identities(raw_msg, copy=False)
            msg = self.session.unserialize(msg, content=False, copy=False)
        except Exception:
            self.log.error("task::Invaid task msg: %r" % raw_msg,
                           exc_info=True)
            return

        # send to monitor
        self.mon_stream.send_multipart([b'intask'] + raw_msg, copy=False)

        header = msg['header']
        msg_id = header['msg_id']
        self.all_ids.add(msg_id)

        # get targets as a set of bytes objects
        # from a list of unicode objects
        targets = header.get('targets', [])
        targets = map(asbytes, targets)
        targets = set(targets)

        retries = header.get('retries', 0)
        self.retries[msg_id] = retries

        # time dependencies
        after = header.get('after', None)
        if after:
            after = Dependency(after)
            if after.all:
                if after.success:
                    after = Dependency(
                        after.difference(self.all_completed),
                        success=after.success,
                        failure=after.failure,
                        all=after.all,
                    )
                if after.failure:
                    after = Dependency(
                        after.difference(self.all_failed),
                        success=after.success,
                        failure=after.failure,
                        all=after.all,
                    )
            if after.check(self.all_completed, self.all_failed):
                # recast as empty set, if `after` already met,
                # to prevent unnecessary set comparisons
                after = MET
        else:
            after = MET

        # location dependencies
        follow = Dependency(header.get('follow', []))

        # turn timeouts into datetime objects:
        timeout = header.get('timeout', None)
        if timeout:
            # cast to float, because jsonlib returns floats as decimal.Decimal,
            # which timedelta does not accept
            timeout = datetime.now() + timedelta(0, float(timeout), 0)

        job = Job(
            msg_id=msg_id,
            raw_msg=raw_msg,
            idents=idents,
            msg=msg,
            header=header,
            targets=targets,
            after=after,
            follow=follow,
            timeout=timeout,
        )

        # validate and reduce dependencies:
        for dep in after, follow:
            if not dep:  # empty dependency
                continue
            # check valid:
            if msg_id in dep or dep.difference(self.all_ids):
                self.depending[msg_id] = job
                return self.fail_unreachable(msg_id, error.InvalidDependency)
            # check if unreachable:
            if dep.unreachable(self.all_completed, self.all_failed):
                self.depending[msg_id] = job
                return self.fail_unreachable(msg_id)

        if after.check(self.all_completed, self.all_failed):
            # time deps already met, try to run
            if not self.maybe_run(job):
                # can't run yet
                if msg_id not in self.all_failed:
                    # could have failed as unreachable
                    self.save_unmet(job)
        else:
            self.save_unmet(job)

    def audit_timeouts(self):
        """Audit all waiting tasks for expired timeouts."""
        now = datetime.now()
        for msg_id in self.depending.keys():
            # must recheck, in case one failure cascaded to another:
            if msg_id in self.depending:
                job = self.depending[msg_id]
                if job.timeout and job.timeout < now:
                    self.fail_unreachable(msg_id, error.TaskTimeout)

    def fail_unreachable(self, msg_id, why=error.ImpossibleDependency):
        """a task has become unreachable, send a reply with an ImpossibleDependency
        error."""
        if msg_id not in self.depending:
            self.log.error("msg %r already failed!", msg_id)
            return
        job = self.depending.pop(msg_id)
        for mid in job.dependents:
            if mid in self.graph:
                self.graph[mid].remove(msg_id)

        try:
            raise why()
        except:
            content = error.wrap_exception()

        self.all_done.add(msg_id)
        self.all_failed.add(msg_id)

        msg = self.session.send(self.client_stream,
                                'apply_reply',
                                content,
                                parent=job.header,
                                ident=job.idents)
        self.session.send(self.mon_stream,
                          msg,
                          ident=[b'outtask'] + job.idents)

        self.update_graph(msg_id, success=False)

    def maybe_run(self, job):
        """check location dependencies, and run if they are met."""
        msg_id = job.msg_id
        self.log.debug("Attempting to assign task %s", msg_id)
        if not self.targets:
            # no engines, definitely can't run
            return False

        if job.follow or job.targets or job.blacklist or self.hwm:
            # we need a can_run filter
            def can_run(idx):
                # check hwm
                if self.hwm and self.loads[idx] == self.hwm:
                    return False
                target = self.targets[idx]
                # check blacklist
                if target in job.blacklist:
                    return False
                # check targets
                if job.targets and target not in job.targets:
                    return False
                # check follow
                return job.follow.check(self.completed[target],
                                        self.failed[target])

            indices = filter(can_run, range(len(self.targets)))

            if not indices:
                # couldn't run
                if job.follow.all:
                    # check follow for impossibility
                    dests = set()
                    relevant = set()
                    if job.follow.success:
                        relevant = self.all_completed
                    if job.follow.failure:
                        relevant = relevant.union(self.all_failed)
                    for m in job.follow.intersection(relevant):
                        dests.add(self.destinations[m])
                    if len(dests) > 1:
                        self.depending[msg_id] = job
                        self.fail_unreachable(msg_id)
                        return False
                if job.targets:
                    # check blacklist+targets for impossibility
                    job.targets.difference_update(blacklist)
                    if not job.targets or not job.targets.intersection(
                            self.targets):
                        self.depending[msg_id] = job
                        self.fail_unreachable(msg_id)
                        return False
                return False
        else:
            indices = None

        self.submit_task(job, indices)
        return True

    def save_unmet(self, job):
        """Save a message for later submission when its dependencies are met."""
        msg_id = job.msg_id
        self.depending[msg_id] = job
        # track the ids in follow or after, but not those already finished
        for dep_id in job.after.union(job.follow).difference(self.all_done):
            if dep_id not in self.graph:
                self.graph[dep_id] = set()
            self.graph[dep_id].add(msg_id)

    def submit_task(self, job, indices=None):
        """Submit a task to any of a subset of our targets."""
        if indices:
            loads = [self.loads[i] for i in indices]
        else:
            loads = self.loads
        idx = self.scheme(loads)
        if indices:
            idx = indices[idx]
        target = self.targets[idx]
        # print (target, map(str, msg[:3]))
        # send job to the engine
        self.engine_stream.send(target, flags=zmq.SNDMORE, copy=False)
        self.engine_stream.send_multipart(job.raw_msg, copy=False)
        # update load
        self.add_job(idx)
        self.pending[target][job.msg_id] = job
        # notify Hub
        content = dict(msg_id=job.msg_id, engine_id=target.decode('ascii'))
        self.session.send(self.mon_stream,
                          'task_destination',
                          content=content,
                          ident=[b'tracktask', self.ident])

    #-----------------------------------------------------------------------
    # Result Handling
    #-----------------------------------------------------------------------
    def dispatch_result(self, raw_msg):
        """dispatch method for result replies"""
        try:
            idents, msg = self.session.feed_identities(raw_msg, copy=False)
            msg = self.session.unserialize(msg, content=False, copy=False)
            engine = idents[0]
            try:
                idx = self.targets.index(engine)
            except ValueError:
                pass  # skip load-update for dead engines
            else:
                self.finish_job(idx)
        except Exception:
            self.log.error("task::Invaid result: %r", raw_msg, exc_info=True)
            return

        header = msg['header']
        parent = msg['parent_header']
        if header.get('dependencies_met', True):
            success = (header['status'] == 'ok')
            msg_id = parent['msg_id']
            retries = self.retries[msg_id]
            if not success and retries > 0:
                # failed
                self.retries[msg_id] = retries - 1
                self.handle_unmet_dependency(idents, parent)
            else:
                del self.retries[msg_id]
                # relay to client and update graph
                self.handle_result(idents, parent, raw_msg, success)
                # send to Hub monitor
                self.mon_stream.send_multipart([b'outtask'] + raw_msg,
                                               copy=False)
        else:
            self.handle_unmet_dependency(idents, parent)

    def handle_result(self, idents, parent, raw_msg, success=True):
        """handle a real task result, either success or failure"""
        # first, relay result to client
        engine = idents[0]
        client = idents[1]
        # swap_ids for XREP-XREP mirror
        raw_msg[:2] = [client, engine]
        # print (map(str, raw_msg[:4]))
        self.client_stream.send_multipart(raw_msg, copy=False)
        # now, update our data structures
        msg_id = parent['msg_id']
        self.pending[engine].pop(msg_id)
        if success:
            self.completed[engine].add(msg_id)
            self.all_completed.add(msg_id)
        else:
            self.failed[engine].add(msg_id)
            self.all_failed.add(msg_id)
        self.all_done.add(msg_id)
        self.destinations[msg_id] = engine

        self.update_graph(msg_id, success)

    def handle_unmet_dependency(self, idents, parent):
        """handle an unmet dependency"""
        engine = idents[0]
        msg_id = parent['msg_id']

        job = self.pending[engine].pop(msg_id)
        job.blacklist.add(engine)

        if job.blacklist == job.targets:
            self.depending[msg_id] = job
            self.fail_unreachable(msg_id)
        elif not self.maybe_run(job):
            # resubmit failed
            if msg_id not in self.all_failed:
                # put it back in our dependency tree
                self.save_unmet(job)

        if self.hwm:
            try:
                idx = self.targets.index(engine)
            except ValueError:
                pass  # skip load-update for dead engines
            else:
                if self.loads[idx] == self.hwm - 1:
                    self.update_graph(None)

    def update_graph(self, dep_id=None, success=True):
        """dep_id just finished. Update our dependency
        graph and submit any jobs that just became runable.

        Called with dep_id=None to update entire graph for hwm, but without finishing
        a task.
        """
        # print ("\n\n***********")
        # pprint (dep_id)
        # pprint (self.graph)
        # pprint (self.depending)
        # pprint (self.all_completed)
        # pprint (self.all_failed)
        # print ("\n\n***********\n\n")
        # update any jobs that depended on the dependency
        jobs = self.graph.pop(dep_id, [])

        # recheck *all* jobs if
        # a) we have HWM and an engine just become no longer full
        # or b) dep_id was given as None

        if dep_id is None or self.hwm and any(
            [load == self.hwm - 1 for load in self.loads]):
            jobs = self.depending.keys()

        for msg_id in sorted(
                jobs, key=lambda msg_id: self.depending[msg_id].timestamp):
            job = self.depending[msg_id]

            if job.after.unreachable(self.all_completed, self.all_failed)\
                    or job.follow.unreachable(self.all_completed, self.all_failed):
                self.fail_unreachable(msg_id)

            elif job.after.check(self.all_completed,
                                 self.all_failed):  # time deps met, maybe run
                if self.maybe_run(job):

                    self.depending.pop(msg_id)
                    for mid in job.dependents:
                        if mid in self.graph:
                            self.graph[mid].remove(msg_id)

    #----------------------------------------------------------------------
    # methods to be overridden by subclasses
    #----------------------------------------------------------------------

    def add_job(self, idx):
        """Called after self.targets[idx] just got the job with header.
        Override with subclasses.  The default ordering is simple LRU.
        The default loads are the number of outstanding jobs."""
        self.loads[idx] += 1
        for lis in (self.targets, self.loads):
            lis.append(lis.pop(idx))

    def finish_job(self, idx):
        """Called after self.targets[idx] just finished a job.
        Override with subclasses."""
        self.loads[idx] -= 1
Пример #8
0
class HubAuth(BaseAuth):
    """Jupyter hub authenticator."""

    graders = List([],
                   config=True,
                   help="List of JupyterHub user names allowed to grade.")

    proxy_address = Unicode(
        config=True, help="Address of the configurable-http-proxy server.")

    def _proxy_address_default(self):
        return self._ip

    proxy_port = Int(8001,
                     config=True,
                     help="Port of the configurable-http-proxy server.")

    hub_address = Unicode(config=True, help="Address of the hub server.")

    def _hub_address_default(self):
        return self._ip

    hub_port = Int(8000, config=True, help="Port of the hub server.")

    hubapi_address = Unicode(config=True, help="Address of the hubapi server.")

    def _hubapi_address_default(self):
        return self._ip

    hubapi_port = Int(8081, config=True, help="Port of the hubapi server.")
    hubapi_cookie = Unicode("jupyter-hub-token",
                            config=True,
                            help="Name of the cookie used by JupyterHub")

    notebook_url_prefix = Unicode(None,
                                  config=True,
                                  allow_none=True,
                                  help="""
        Relative path of the formgrader with respect to the hub's user base
        directory.  No trailing slash. i.e. "Documents" or "Documents/notebooks". """
                                  )

    def _notebook_url_prefix_changed(self, name, old, new):
        self.notebook_url_prefix = new.strip('/')

    hub_base_url = Unicode(config=True, help="Base URL of the hub server.")

    def _hub_base_url_default(self):
        return 'http://{}:{}'.format(self.hub_address, self.hub_port)

    generate_hubapi_token = Bool(False,
                                 config=True,
                                 help="""Use `jupyterhub token` as a default
        for HubAuth.hubapi_token instead of $JPY_API_TOKEN.""")

    hub_db = Unicode(config=True,
                     help="""Path to JupyterHub's database.  Only
        manditory if `generate_hubapi_token` is True.""")

    hubapi_token = Unicode(config=True,
                           help="""JupyterHub API auth token.  
        Generated by running `jupyterhub token`.  If not explicitly set,
        nbgrader will use $JPY_API_TOKEN as the API token.""")

    def _hubapi_token_default(self):
        if self.generate_hubapi_token:
            return check_output(
                ['jupyterhub', 'token',
                 '--db={}'.format(self.hub_db)]).decode('utf-8').strip()
        else:
            return os.environ.get('JPY_API_TOKEN', '')

    proxy_token = Unicode(config=True,
                          help="""JupyterHub configurable proxy 
        auth token.  If not explicitly set, nbgrader will use 
        $CONFIGPROXY_AUTH_TOKEN as the API token.""")

    def _proxy_token_default(self):
        return os.environ.get('CONFIGPROXY_AUTH_TOKEN', '')

    remap_url = Unicode(config=True,
                        help="""Suffix appened to 
        `HubAuth.hub_base_url` to form the full URL to the formgrade server.  By
        default this is '/hub/{{NbGraderConfig.course_id}}'.  Change this if you
        plan on running more than one formgrade server behind one JupyterHub
        instance.""")

    def _remap_url_default(self):
        return '/hub/nbgrader/' + self.parent.course_id

    def _remap_url_changed(self, name, old, new):
        self.remap_url = new.rstrip('/')

    def __init__(self, *args, **kwargs):
        super(HubAuth, self).__init__(*args, **kwargs)

        # Create base URLs for the hub and proxy.
        self._hubapi_base_url = 'http://{}:{}'.format(self.hubapi_address,
                                                      self.hubapi_port)
        self._proxy_base_url = 'http://{}:{}'.format(self.proxy_address,
                                                     self.proxy_port)

        # Register self as a route of the configurable-http-proxy and then
        # update the base_url to point to the new path.
        response = self._proxy_request(
            '/api/routes' + self.remap_url,
            method='POST',
            body={'target': 'http://{}:{}'.format(self._ip, self._port)})
        if response.status_code != 201:
            raise Exception(
                'Error while trying to add JupyterHub route. {}: {}'.format(
                    response.status_code, response.text))
        self._base_url = self.hub_base_url + self.remap_url

        # Redirect all formgrade request to the correct API method.
        self._app.register_blueprint(blueprint,
                                     static_url_path=self.remap_url +
                                     '/static',
                                     url_prefix=self.remap_url,
                                     url_defaults={'name': 'hub'})

    def authenticate(self):
        """Authenticate a request.
        Returns a boolean or flask redirect."""

        # If auth cookie doesn't exist, redirect to the login page with
        # next set to redirect back to the this page.
        if 'jupyter-hub-token' not in request.cookies:
            return redirect(self.hub_base_url + '/hub/login?next=' +
                            self.remap_url)
        cookie = request.cookies[self.hubapi_cookie]

        # Check with the Hub to see if the auth cookie is valid.
        response = self._hubapi_request('/hub/api/authorizations/cookie/' +
                                        self.hubapi_cookie + '/' + cookie)
        if response.status_code == 200:

            #  Auth information recieved.
            data = response.json()
            if 'name' in data:
                user = data['name']

                # Check if the user name is registered as a grader.
                if user in self.graders:
                    self._user = user
                    return True
                else:
                    self.log.warn(
                        'Unauthorized user "%s" attempted to access the formgrader.'
                        % user)
            else:
                self.log.warn(
                    'Malformed response from the JupyterHub auth API.')
                abort(
                    500,
                    "Failed to check authorization, malformed response from Hub auth."
                )
        elif response.status_code == 403:
            self.log.error(
                "I don't have permission to verify cookies, my auth token may have expired: [%i] %s",
                response.status_code, response.reason)
            abort(
                500,
                "Permission failure checking authorization, I may need to be restarted"
            )
        elif response.status_code >= 500:
            self.log.error("Upstream failure verifying auth token: [%i] %s",
                           response.status_code, response.reason)
            abort(502, "Failed to check authorization (upstream problem)")
        elif response.status_code >= 400:
            self.log.warn("Failed to check authorization: [%i] %s",
                          response.status_code, response.reason)
            abort(500, "Failed to check authorization")
        else:
            # Auth invalid, reauthenticate.
            return redirect(self.hub_base_url + '/hub/login?next=' +
                            self.remap_url)
        return False

    def notebook_server_exists(self):
        """Does the notebook server exist?"""
        return True

    def get_notebook_url(self, relative_path):
        """Gets the notebook's url."""
        if self.notebook_url_prefix is not None:
            relative_path = self.notebook_url_prefix + '/' + relative_path
        return self.hub_base_url + "/user/{}/notebooks/{}".format(
            self._user, relative_path)

    def _hubapi_request(self, *args, **kwargs):
        return self._request('hubapi', *args, **kwargs)

    def _proxy_request(self, *args, **kwargs):
        return self._request('proxy', *args, **kwargs)

    def _request(self, service, relative_path, method='GET', body=None):
        base_url = getattr(self, '_%s_base_url' % service)
        token = getattr(self, '%s_token' % service)

        data = body
        if isinstance(data, (dict, )):
            data = json.dumps(data)

        return requests.request(method,
                                base_url + relative_path,
                                headers={'Authorization': 'token %s' % token},
                                data=data)