Пример #1
0
    def __init__(self, shell, data=None):
        # You must call the parent constructor
        super(SparkMagicBase, self).__init__(shell)

        self.logger = Log("SparkMagics")
        self.ipython_display = IpythonDisplay()
        self.spark_controller = SparkController(self.ipython_display)

        try:
            should_serialize = conf.serialize()
            if should_serialize:
                self.logger.debug("Serialization enabled.")

                self.magics_home_path = get_magics_home_path()
                path_to_serialize = join_paths(self.magics_home_path,
                                               "state.json")

                self.logger.debug(
                    "Will serialize to {}.".format(path_to_serialize))

                self.spark_controller = SparkController(
                    self.ipython_display, serialize_path=path_to_serialize)
            else:
                self.logger.debug("Serialization NOT enabled.")
        except KeyError:
            self.logger.error("Could not read env vars for serialization.")

        self.logger.debug("Initialized spark magics.")
Пример #2
0
def test_stderr_flush():
    ipython_shell = MagicMock()
    ipython_display = IpythonDisplay()
    ipython_display._ipython_shell = ipython_shell
    sys.stderr = MagicMock()

    ipython_display.send_error('Testing Stderr Flush')
    assert sys.stderr.flush.call_count == 1
Пример #3
0
def test_stdout_flush():
    ipython_shell = MagicMock()
    ipython_display = IpythonDisplay()
    ipython_display._ipython_shell = ipython_shell
    sys.stdout = MagicMock()

    ipython_display.write(u'Testing Stdout Flush è')
    assert sys.stdout.flush.call_count == 1
Пример #4
0
def test_stderr_flush():
    ipython_shell = MagicMock()
    ipython_display = IpythonDisplay()
    ipython_display._ipython_shell = ipython_shell
    sys.stderr = MagicMock()

    ipython_display.send_error(u'Testing Stderr Flush è')
    assert sys.stderr.flush.call_count == 1
Пример #5
0
def test_stdout_flush():
    ipython_shell = MagicMock()
    ipython_display = IpythonDisplay()
    ipython_display._ipython_shell = ipython_shell
    sys.stdout = MagicMock()

    ipython_display.write('Testing Stdout Flush')
    assert sys.stdout.flush.call_count == 1
Пример #6
0
class SparkMagicBase(Magics):
    def __init__(self, shell, data=None):
        # You must call the parent constructor
        super(SparkMagicBase, self).__init__(shell)

        self.logger = Log("SparkMagics")
        self.ipython_display = IpythonDisplay()
        self.spark_controller = SparkController(self.ipython_display)

        try:
            should_serialize = conf.serialize()
            if should_serialize:
                self.logger.debug("Serialization enabled.")

                self.magics_home_path = get_magics_home_path()
                path_to_serialize = join_paths(self.magics_home_path,
                                               "state.json")

                self.logger.debug(
                    "Will serialize to {}.".format(path_to_serialize))

                self.spark_controller = SparkController(
                    self.ipython_display, serialize_path=path_to_serialize)
            else:
                self.logger.debug("Serialization NOT enabled.")
        except KeyError:
            self.logger.error("Could not read env vars for serialization.")

        self.logger.debug("Initialized spark magics.")

    def execute_sqlquery(self, sqlquery, session, output_var, quiet):
        try:
            df = self.spark_controller.run_cell_sql(sqlquery, session)
            if output_var is not None:
                self.shell.user_ns[output_var] = df
            if quiet:
                return None
            else:
                return df
        except DataFrameParseException as e:
            self.ipython_display.send_error(e.out)
            return None

    @staticmethod
    def print_endpoint_info(info_sessions):
        sessions_info = ["        {}".format(i) for i in info_sessions]
        print("""Info for endpoint:
    Sessions:
{}
""".format("\n".join(sessions_info)))
Пример #7
0
class DataGraph(object):
    """This does not use the table version of plotly because it freezes up the browser for >60 rows. Instead, we use
    pandas df HTML representation."""
    def __init__(self, display=None):
        if display is None:
            self.display = IpythonDisplay()
        else:
            self.display = display

    def render(self, df, encoding, output):
        with output:
            max_rows = pd.get_option("display.max_rows")
            max_cols = pd.get_option("display.max_columns")
            show_dimensions = pd.get_option("display.show_dimensions")

            # This will hide the index column for pandas df.
            self.display.html("""
<style>
    table.dataframe.hideme thead th:first-child {
        display: none;
    }
    table.dataframe.hideme tbody th {
        display: none;
    }
</style>
""")
            self.display.html(
                df.to_html(max_rows=max_rows,
                           max_cols=max_cols,
                           show_dimensions=show_dimensions,
                           notebook=True,
                           classes="hideme"))

    @staticmethod
    def display_logarithmic_x_axis():
        return False

    @staticmethod
    def display_logarithmic_y_axis():
        return False

    @staticmethod
    def display_x():
        return False

    @staticmethod
    def display_y():
        return False
Пример #8
0
    def __init__(self, reader_writer):
        assert reader_writer is not None

        self.logger = Log("ClientManagerStateSerializer")
        self._ipython_display = IpythonDisplay()

        self._reader_writer = reader_writer
Пример #9
0
    def __init__(self, shell, data=None):
        # You must call the parent constructor
        super(SparkMagicBase, self).__init__(shell)

        self.logger = Log("SparkMagics")
        self.ipython_display = IpythonDisplay()
        self.spark_controller = SparkController(self.ipython_display)

        try:
            should_serialize = conf.serialize()
            if should_serialize:
                self.logger.debug("Serialization enabled.")

                self.magics_home_path = get_magics_home_path()
                path_to_serialize = join_paths(self.magics_home_path, "state.json")

                self.logger.debug("Will serialize to {}.".format(path_to_serialize))

                self.spark_controller = SparkController(self.ipython_display, serialize_path=path_to_serialize)
            else:
                self.logger.debug("Serialization NOT enabled.")
        except KeyError:
            self.logger.error("Could not read env vars for serialization.")

        self.logger.debug("Initialized spark magics.")
Пример #10
0
    def __init__(self, implementation, implementation_version, language, language_version, language_info,
                 session_language, user_code_parser=None, **kwargs):
        # Required by Jupyter - Override
        self.implementation = implementation
        self.implementation_version = implementation_version
        self.language = language
        self.language_version = language_version
        self.language_info = language_info

        # Override
        self.session_language = session_language

        super(SparkKernelBase, self).__init__(**kwargs)

        self.logger = Log("_jupyter_kernel".format(self.session_language))
        self._fatal_error = None
        self.ipython_display = IpythonDisplay()

        if user_code_parser is None:
            self.user_code_parser = UserCodeParser()
        else:
            self.user_code_parser = user_code_parser

        # Disable warnings for test env in HDI
        requests.packages.urllib3.disable_warnings()

        if not kwargs.get("testing", False):
            self._load_magics_extension()
            self._change_language()
            if conf.use_auto_viz():
                self._register_auto_viz()
Пример #11
0
class SparkMagicBase(Magics):
    def __init__(self, shell, data=None):
        # You must call the parent constructor
        super(SparkMagicBase, self).__init__(shell)

        self.logger = Log("SparkMagics")
        self.ipython_display = IpythonDisplay()
        self.spark_controller = SparkController(self.ipython_display)

        try:
            should_serialize = conf.serialize()
            if should_serialize:
                self.logger.debug("Serialization enabled.")

                self.magics_home_path = get_magics_home_path()
                path_to_serialize = join_paths(self.magics_home_path, "state.json")

                self.logger.debug("Will serialize to {}.".format(path_to_serialize))

                self.spark_controller = SparkController(self.ipython_display, serialize_path=path_to_serialize)
            else:
                self.logger.debug("Serialization NOT enabled.")
        except KeyError:
            self.logger.error("Could not read env vars for serialization.")

        self.logger.debug("Initialized spark magics.")

    def execute_sqlquery(self, sqlquery, session, output_var, quiet):
        try:
            df = self.spark_controller.run_cell_sql(sqlquery, session)
            if output_var is not None:
                self.shell.user_ns[output_var] = df
            if quiet:
                return None
            else:
                return df
        except DataFrameParseException as e:
            self.ipython_display.send_error(e.out)
            return None

    @staticmethod
    def print_endpoint_info(info_sessions):
        sessions_info = ["        {}".format(i) for i in info_sessions]
        print("""Info for endpoint:
    Sessions:
{}
""".format("\n".join(sessions_info)))
Пример #12
0
    def __init__(self,
                 df,
                 encoding,
                 renderer=None,
                 ipywidget_factory=None,
                 encoding_widget=None,
                 ipython_display=None,
                 nested_widget_mode=False,
                 testing=False,
                 **kwargs):
        assert encoding is not None
        assert df is not None
        assert type(df) is pd.DataFrame
        assert len(df.columns) > 0

        kwargs['orientation'] = 'vertical'

        if not testing:
            super(AutoVizWidget, self).__init__((), **kwargs)

        if renderer is None:
            renderer = GraphRenderer()
        self.renderer = renderer

        if ipywidget_factory is None:
            ipywidget_factory = IpyWidgetFactory()
        self.ipywidget_factory = ipywidget_factory

        if encoding_widget is None:
            encoding_widget = EncodingWidget(df, encoding, self.on_render_viz)
        self.encoding_widget = encoding_widget

        if ipython_display is None:
            ipython_display = IpythonDisplay()
        self.ipython_display = ipython_display

        self.df = df

        self.encoding = encoding

        # Widget that will become the only child of AutoVizWidget
        self.widget = self.ipywidget_factory.get_vbox()

        # Create output area
        self.to_display = self.ipywidget_factory.get_output()
        self.to_display.width = "800px"
        self.output = self.ipywidget_factory.get_hbox()
        self.output.children = [self.to_display]

        self.controls = self._create_controls_widget()

        if nested_widget_mode:
            self.widget.children = [self.controls, self.output]
            self.children = [self.widget]
        else:
            self.ipython_display.display(self.controls)
            self.ipython_display.display(self.to_display)

        self.on_render_viz()
Пример #13
0
class DataGraph(object):
    """This does not use the table version of plotly because it freezes up the browser for >60 rows. Instead, we use
    pandas df HTML representation."""
    def __init__(self, display=None):
        if display is None:
            self.display = IpythonDisplay()
        else:
            self.display = display

    def render(self, df, encoding, output):
        with output:
            max_rows = pd.get_option("display.max_rows")
            max_cols = pd.get_option("display.max_columns")
            show_dimensions = pd.get_option("display.show_dimensions")

            # This will hide the index column for pandas df.
            self.display.html("""
<style>
    table.dataframe.hideme thead th:first-child {
        display: none;
    }
    table.dataframe.hideme tbody th {
        display: none;
    }
</style>
""")
            self.display.html(df.to_html(max_rows=max_rows, max_cols=max_cols,
                                         show_dimensions=show_dimensions, notebook=True, classes="hideme"))

    @staticmethod
    def display_logarithmic_x_axis():
        return False

    @staticmethod
    def display_logarithmic_y_axis():
        return False

    @staticmethod
    def display_x():
        return False

    @staticmethod
    def display_y():
        return False
Пример #14
0
    def __init__(self, shell, data=None, spark_events=None):
        # You must call the parent constructor
        super(SparkMagicBase, self).__init__(shell)

        self.logger = Log("SparkMagics")
        self.ipython_display = IpythonDisplay()
        self.spark_controller = SparkController(self.ipython_display)

        self.logger.debug("Initialized spark magics.")

        if spark_events is None:
            spark_events = SparkEvents()
        spark_events.emit_library_loaded_event()
Пример #15
0
    def __init__(self,
                 implementation,
                 implementation_version,
                 language,
                 language_version,
                 language_info,
                 session_language,
                 user_code_parser=None,
                 **kwargs):
        # Required by Jupyter - Override
        self.implementation = implementation
        self.implementation_version = implementation_version
        self.language = language
        self.language_version = language_version
        self.language_info = language_info

        # Override
        self.session_language = session_language

        super(SparkKernelBase, self).__init__(**kwargs)

        self.logger = Log("_jupyter_kernel".format(self.session_language))
        self._fatal_error = None
        self.ipython_display = IpythonDisplay()

        if user_code_parser is None:
            self.user_code_parser = UserCodeParser()
        else:
            self.user_code_parser = user_code_parser

        # Disable warnings for test env in HDI
        requests.packages.urllib3.disable_warnings()

        if not kwargs.get("testing", False):
            self._load_magics_extension()
            self._change_language()
            if conf.use_auto_viz():
                self._register_auto_viz()
Пример #16
0
    def __init__(self, spark_controller, ipywidget_factory=None, ipython_display=None,
                 nested_widget_mode=False, testing=False, **kwargs):
        kwargs['orientation'] = 'vertical'

        if not testing:
            super(AbstractMenuWidget, self).__init__((), **kwargs)

        self.spark_controller = spark_controller

        if ipywidget_factory is None:
            ipywidget_factory = IpyWidgetFactory()
        self.ipywidget_factory = ipywidget_factory

        if ipython_display is None:
            ipython_display = IpythonDisplay()
        self.ipython_display = ipython_display

        self.children = []

        if not nested_widget_mode:
            self._repr_html_()
Пример #17
0
    def __init__(self, implementation, implementation_version, language, language_version, language_info,
                 kernel_conf_name, session_language, client_name, **kwargs):
        # Required by Jupyter - Override
        self.implementation = implementation
        self.implementation_version = implementation_version
        self.language = language
        self.language_version = language_version
        self.language_info = language_info

        # Override
        self.kernel_conf_name = kernel_conf_name
        self.session_language = session_language
        self.client_name = client_name

        super(SparkKernelBase, self).__init__(**kwargs)

        self._logger = Log(self.client_name)
        self._session_started = False
        self._fatal_error = None
        self._ipython_display = IpythonDisplay()

        self.user_command_parser = UserCommandParser()

        # Disable warnings for test env in HDI
        requests.packages.urllib3.disable_warnings()

        if not kwargs.get("testing", False):
            configuration = self._get_configuration()
            if not configuration:
                # _get_configuration() sets the error for us so we can just return now.
                # The kernel is not in a good state and all do_execute calls will
                # fail with the fatal error.
                return
            (username, password, url) = configuration
            self.connection_string = get_connection_string(url, username, password)
            self._load_magics_extension()
            if conf.use_auto_viz():
                self._register_auto_viz()
Пример #18
0
 def __init__(self, display=None):
     if display is None:
         self.display = IpythonDisplay()
     else:
         self.display = display
Пример #19
0
class SparkKernelBase(IPythonKernel):
    def __init__(self, implementation, implementation_version, language, language_version, language_info,
                 session_language, user_code_parser=None, **kwargs):
        # Required by Jupyter - Override
        self.implementation = implementation
        self.implementation_version = implementation_version
        self.language = language
        self.language_version = language_version
        self.language_info = language_info

        # Override
        self.session_language = session_language

        super(SparkKernelBase, self).__init__(**kwargs)

        self.logger = Log("_jupyter_kernel".format(self.session_language))
        self._fatal_error = None
        self.ipython_display = IpythonDisplay()

        if user_code_parser is None:
            self.user_code_parser = UserCodeParser()
        else:
            self.user_code_parser = user_code_parser

        # Disable warnings for test env in HDI
        requests.packages.urllib3.disable_warnings()

        if not kwargs.get("testing", False):
            self._load_magics_extension()
            self._change_language()
            if conf.use_auto_viz():
                self._register_auto_viz()

    def do_execute(self, code, silent, store_history=True, user_expressions=None, allow_stdin=False):
        def f(self):
            if self._fatal_error is not None:
                return self._repeat_fatal_error()

            return self._do_execute(code, silent, store_history, user_expressions, allow_stdin)
        return wrap_unexpected_exceptions(f, self._complete_cell)(self)

    def do_shutdown(self, restart):
        # Cleanup
        self._delete_session()

        return self._do_shutdown_ipykernel(restart)

    def _do_execute(self, code, silent, store_history, user_expressions, allow_stdin):
        code_to_run = self.user_code_parser.get_code_to_run(code)

        res = self._execute_cell(code_to_run, silent, store_history, user_expressions, allow_stdin)

        return res

    def _load_magics_extension(self):
        register_magics_code = "%load_ext remotespark.kernels"
        self._execute_cell(register_magics_code, True, False, shutdown_if_error=True,
                           log_if_error="Failed to load the Spark kernels magics library.")
        self.logger.debug("Loaded magics.")

    def _change_language(self):
        register_magics_code = "%%_do_not_call_change_language -l {}\n ".format(self.session_language)
        self._execute_cell(register_magics_code, True, False, shutdown_if_error=True,
                           log_if_error="Failed to change language to {}.".format(self.session_language))
        self.logger.debug("Changed language.")

    def _register_auto_viz(self):
        register_auto_viz_code = """from remotespark.datawidgets.utils import display_dataframe
ip = get_ipython()
ip.display_formatter.ipython_display_formatter.for_type_by_name('pandas.core.frame', 'DataFrame', display_dataframe)"""
        self._execute_cell(register_auto_viz_code, True, False, shutdown_if_error=True,
                           log_if_error="Failed to register auto viz for notebook.")
        self.logger.debug("Registered auto viz.")

    def _delete_session(self):
        code = "%%_do_not_call_delete_session\n "
        self._execute_cell_for_user(code, True, False)

    def _execute_cell(self, code, silent, store_history=True, user_expressions=None, allow_stdin=False,
                      shutdown_if_error=False, log_if_error=None):
        reply_content = self._execute_cell_for_user(code, silent, store_history, user_expressions, allow_stdin)

        if shutdown_if_error and reply_content[u"status"] == u"error":
            error_from_reply = reply_content[u"evalue"]
            if log_if_error is not None:
                message = "{}\nException details:\n\t\"{}\"".format(log_if_error, error_from_reply)
                return self._abort_with_fatal_error(message)

        return reply_content

    def _execute_cell_for_user(self, code, silent, store_history=True, user_expressions=None, allow_stdin=False):
        return super(SparkKernelBase, self).do_execute(code, silent, store_history, user_expressions, allow_stdin)

    def _do_shutdown_ipykernel(self, restart):
        return super(SparkKernelBase, self).do_shutdown(restart)

    def _complete_cell(self):
        """A method that runs a cell with no effect. Call this and return the value it
        returns when there's some sort of error preventing the user's cell from executing; this
        will register the cell from the Jupyter UI as being completed."""
        return self._execute_cell("None", False, True, None, False)

    def _show_user_error(self, message):
        self.logger.error(message)
        self.ipython_display.send_error(message)

    def _queue_fatal_error(self, message):
        """Queues up a fatal error to be thrown when the next cell is executed; does not
        raise an error immediately. We use this for errors that happen on kernel startup,
        since IPython crashes if we throw an exception in the __init__ method."""
        self._fatal_error = message

    def _abort_with_fatal_error(self, message):
        """Queues up a fatal error and throws it immediately."""
        self._queue_fatal_error(message)
        return self._repeat_fatal_error()

    def _repeat_fatal_error(self):
        """Throws an error that has already been queued."""
        error = conf.fatal_error_suggestion().format(self._fatal_error)
        self.logger.error(error)
        self.ipython_display.send_error(error)
        return self._complete_cell()
class RemoteSparkMagics(Magics):
    def __init__(self, shell, data=None):
        # You must call the parent constructor
        super(RemoteSparkMagics, self).__init__(shell)

        self.logger = Log("RemoteSparkMagics")
        self.ipython_display = IpythonDisplay()
        self.spark_controller = SparkController(self.ipython_display)

        try:
            should_serialize = conf.serialize()
            if should_serialize:
                self.logger.debug("Serialization enabled.")

                self.magics_home_path = get_magics_home_path()
                path_to_serialize = join_paths(self.magics_home_path, "state.json")

                self.logger.debug("Will serialize to {}.".format(path_to_serialize))

                self.spark_controller = SparkController(self.ipython_display, serialize_path=path_to_serialize)
            else:
                self.logger.debug("Serialization NOT enabled.")
        except KeyError:
            self.logger.error("Could not read env vars for serialization.")

        self.logger.debug("Initialized spark magics.")

    @magic_arguments()
    @argument("-c", "--context", type=str, default=Constants.context_name_spark,
              help="Context to use: '{}' for spark, '{}' for sql queries, and '{}' for hive queries. "
                   "Default is '{}'.".format(Constants.context_name_spark,
                                             Constants.context_name_sql,
                                             Constants.context_name_hive,
                                             Constants.context_name_spark))
    @argument("-s", "--session", help="The name of the Livy session to use. "
                                      "If only one session has been created, there's no need to specify one.")
    @argument("-o", "--output", type=str, default=None, help="If present, output when using SQL or Hive "
                                                             "query will be stored in variable of this name.")
    @argument("command", type=str, default=[""], nargs="*", help="Commands to execute.")
    @needs_local_scope
    @line_cell_magic
    def spark(self, line, cell="", local_ns=None):
        """Magic to execute spark remotely.

           This magic allows you to create a Livy Scala or Python session against a Livy endpoint. Every session can
           be used to execute either Spark code or SparkSQL code by executing against the SQL context in the session.
           When the SQL context is used, the result will be a Pandas dataframe of a sample of the results.

           If invoked with no subcommand, the cell will be executed against the specified session.

           Subcommands
           -----------
           info
               Display the available Livy sessions and other configurations for sessions.
           add
               Add a Livy session. First argument is the name of the session, second argument
               is the language, and third argument is the connection string of the Livy endpoint.
               A fourth argument specifying if session creation can be skipped if it already exists is optional:
               "skip" or empty.
               e.g. `%%spark add test python url=https://sparkcluster.net/livy;username=u;password=p skip`
               or
               e.g. `%%spark add test python url=https://sparkcluster.net/livy;username=u;password=p`
           config
               Override the livy session properties sent to Livy on session creation. All session creations will
               contain these config settings from then on.
               Expected value is a JSON key-value string to be sent as part of the Request Body for the POST /sessions
               endpoint in Livy.
               e.g. `%%spark config {"driverMemory":"1000M", "executorCores":4}`
           run
               Run Spark code against a session.
               e.g. `%%spark -s testsession` will execute the cell code against the testsession previously created
               e.g. `%%spark -s testsession -c sql` will execute the SQL code against the testsession previously created
               e.g. `%%spark -s testsession -c sql -o my_var` will execute the SQL code against the testsession
                        previously created and store the pandas dataframe created in the my_var variable in the
                        Python environment.
           logs
               Returns the logs for a given session.
               e.g. `%%spark logs -s testsession` will return the logs for the testsession previously created
           delete
               Delete a Livy session. Argument is the name of the session to be deleted.
               e.g. `%%spark delete defaultlivy`
           cleanup
               Delete all Livy sessions created by the notebook. No arguments required.
               e.g. `%%spark cleanup`
        """
        usage = "Please look at usage of %spark by executing `%spark?`."
        user_input = line
        args = parse_argstring(self.spark, user_input)

        subcommand = args.command[0].lower()

        try:
            # info
            if subcommand == "info":
                if len(args.command) == 2:
                    connection_string = args.command[1]
                    info_sessions = self.spark_controller.get_all_sessions_endpoint_info(connection_string)
                    self._print_endpoint_info(info_sessions)
                elif len(args.command) == 1:
                    self._print_local_info()
                else:
                    raise ValueError("Subcommand 'info' requires no value or a connection string to show all sessions.\n"
                                     "{}".format(usage))
            # config
            elif subcommand == "config":
                # Would normally do " ".join(args.command[1:]) but parse_argstring removes quotes...
                rest_of_line = user_input[7:]
                conf.override(conf.session_configs.__name__, json.loads(rest_of_line))
            # add
            elif subcommand == "add":
                if len(args.command) != 4 and len(args.command) != 5:
                    raise ValueError("Subcommand 'add' requires three or four arguments.\n{}".format(usage))

                name = args.command[1].lower()
                language = args.command[2].lower()
                connection_string = args.command[3]

                if len(args.command) == 5:
                    skip = args.command[4].lower() == "skip"
                else:
                    skip = False

                properties = copy.deepcopy(conf.session_configs())
                properties["kind"] = self._get_livy_kind(language)

                self.spark_controller.add_session(name, connection_string, skip, properties)
            # delete
            elif subcommand == "delete":
                if len(args.command) == 2:
                    name = args.command[1].lower()
                    self.spark_controller.delete_session_by_name(name)
                elif len(args.command) == 3:
                    connection_string = args.command[1]
                    session_id = args.command[2]
                    self.spark_controller.delete_session_by_id(connection_string, session_id)
                else:
                    raise ValueError("Subcommand 'delete' requires a session name or a connection string and id.\n{}"
                                     .format(usage))
            # cleanup
            elif subcommand == "cleanup":
                if len(args.command) == 2:
                    connection_string = args.command[1]
                    self.spark_controller.cleanup_endpoint(connection_string)
                elif len(args.command) == 1:
                    self.spark_controller.cleanup()
                else:
                    raise ValueError("Subcommand 'cleanup' requires no further values or a connection string to clean up "
                                     "sessions.\n{}".format(usage))
            # logs
            elif subcommand == "logs":
                if len(args.command) == 1:
                    (success, out) = self.spark_controller.get_logs(args.session)
                    if success:
                        self.ipython_display.write(out)
                    else:
                        self.ipython_display.send_error(out)
                else:
                    raise ValueError("Subcommand 'logs' requires no further values.\n{}".format(usage))
            # run
            elif len(subcommand) == 0:
                if args.context == Constants.context_name_spark:
                    (success, out) = self.spark_controller.run_cell(cell, args.session)
                    if success:
                        self.ipython_display.write(out)
                    else:
                        self.ipython_display.send_error(out)
                elif args.context == Constants.context_name_sql:
                    return self._execute_against_context_that_returns_df(self.spark_controller.run_cell_sql, cell,
                                                                         args.session, args.output)
                elif args.context == Constants.context_name_hive:
                    return self._execute_against_context_that_returns_df(self.spark_controller.run_cell_hive, cell,
                                                                         args.session, args.output)
                else:
                    raise ValueError("Context '{}' not found".format(args.context))
            # error
            else:
                raise ValueError("Subcommand '{}' not found. {}".format(subcommand, usage))
        except ValueError as err:
            self.ipython_display.send_error("{}".format(err))

    def _execute_against_context_that_returns_df(self, method, cell, session, output_var):
        try:
            df = method(cell, session)
            if output_var is not None:
                self.shell.user_ns[output_var] = df
            return df
        except DataFrameParseException as e:
            self.ipython_display.send_error(e.out)
            return None

    def _print_local_info(self):
        sessions_info = ["        {}".format(i) for i in self.spark_controller.get_manager_sessions_str()]
        print("""Info for running Spark:
    Sessions:
{}
    Session configs:
        {}
""".format("\n".join(sessions_info), conf.session_configs()))


    def _print_endpoint_info(self, info_sessions):
        sessions_info = ["        {}".format(i) for i in info_sessions]
        print("""Info for endpoint:
    Sessions:
{}
""".format("\n".join(sessions_info)))

    @staticmethod
    def _get_livy_kind(language):
        if language == Constants.lang_scala:
            return Constants.session_kind_spark
        elif language == Constants.lang_python:
            return Constants.session_kind_pyspark
        elif language == Constants.lang_r:
            return Constants.session_kind_sparkr
        else:
            raise ValueError("Cannot get session kind for {}.".format(language))
Пример #21
0
class SparkKernelBase(IPythonKernel):
    def __init__(self,
                 implementation,
                 implementation_version,
                 language,
                 language_version,
                 language_info,
                 session_language,
                 user_code_parser=None,
                 **kwargs):
        # Required by Jupyter - Override
        self.implementation = implementation
        self.implementation_version = implementation_version
        self.language = language
        self.language_version = language_version
        self.language_info = language_info

        # Override
        self.session_language = session_language

        super(SparkKernelBase, self).__init__(**kwargs)

        self.logger = Log("_jupyter_kernel".format(self.session_language))
        self._fatal_error = None
        self.ipython_display = IpythonDisplay()

        if user_code_parser is None:
            self.user_code_parser = UserCodeParser()
        else:
            self.user_code_parser = user_code_parser

        # Disable warnings for test env in HDI
        requests.packages.urllib3.disable_warnings()

        if not kwargs.get("testing", False):
            self._load_magics_extension()
            self._change_language()
            if conf.use_auto_viz():
                self._register_auto_viz()

    def do_execute(self,
                   code,
                   silent,
                   store_history=True,
                   user_expressions=None,
                   allow_stdin=False):
        def f(self):
            if self._fatal_error is not None:
                return self._repeat_fatal_error()

            return self._do_execute(code, silent, store_history,
                                    user_expressions, allow_stdin)

        return wrap_unexpected_exceptions(f, self._complete_cell)(self)

    def do_shutdown(self, restart):
        # Cleanup
        self._delete_session()

        return self._do_shutdown_ipykernel(restart)

    def _do_execute(self, code, silent, store_history, user_expressions,
                    allow_stdin):
        code_to_run = self.user_code_parser.get_code_to_run(code)

        res = self._execute_cell(code_to_run, silent, store_history,
                                 user_expressions, allow_stdin)

        return res

    def _load_magics_extension(self):
        register_magics_code = "%load_ext remotespark.kernels"
        self._execute_cell(
            register_magics_code,
            True,
            False,
            shutdown_if_error=True,
            log_if_error="Failed to load the Spark kernels magics library.")
        self.logger.debug("Loaded magics.")

    def _change_language(self):
        register_magics_code = "%%_do_not_call_change_language -l {}\n ".format(
            self.session_language)
        self._execute_cell(
            register_magics_code,
            True,
            False,
            shutdown_if_error=True,
            log_if_error="Failed to change language to {}.".format(
                self.session_language))
        self.logger.debug("Changed language.")

    def _register_auto_viz(self):
        register_auto_viz_code = """from remotespark.datawidgets.utils import display_dataframe
ip = get_ipython()
ip.display_formatter.ipython_display_formatter.for_type_by_name('pandas.core.frame', 'DataFrame', display_dataframe)"""
        self._execute_cell(
            register_auto_viz_code,
            True,
            False,
            shutdown_if_error=True,
            log_if_error="Failed to register auto viz for notebook.")
        self.logger.debug("Registered auto viz.")

    def _delete_session(self):
        code = "%%_do_not_call_delete_session\n "
        self._execute_cell_for_user(code, True, False)

    def _execute_cell(self,
                      code,
                      silent,
                      store_history=True,
                      user_expressions=None,
                      allow_stdin=False,
                      shutdown_if_error=False,
                      log_if_error=None):
        reply_content = self._execute_cell_for_user(code, silent,
                                                    store_history,
                                                    user_expressions,
                                                    allow_stdin)

        if shutdown_if_error and reply_content[u"status"] == u"error":
            error_from_reply = reply_content[u"evalue"]
            if log_if_error is not None:
                message = "{}\nException details:\n\t\"{}\"".format(
                    log_if_error, error_from_reply)
                return self._abort_with_fatal_error(message)

        return reply_content

    def _execute_cell_for_user(self,
                               code,
                               silent,
                               store_history=True,
                               user_expressions=None,
                               allow_stdin=False):
        return super(SparkKernelBase,
                     self).do_execute(code, silent, store_history,
                                      user_expressions, allow_stdin)

    def _do_shutdown_ipykernel(self, restart):
        return super(SparkKernelBase, self).do_shutdown(restart)

    def _complete_cell(self):
        """A method that runs a cell with no effect. Call this and return the value it
        returns when there's some sort of error preventing the user's cell from executing; this
        will register the cell from the Jupyter UI as being completed."""
        return self._execute_cell("None", False, True, None, False)

    def _show_user_error(self, message):
        self.logger.error(message)
        self.ipython_display.send_error(message)

    def _queue_fatal_error(self, message):
        """Queues up a fatal error to be thrown when the next cell is executed; does not
        raise an error immediately. We use this for errors that happen on kernel startup,
        since IPython crashes if we throw an exception in the __init__ method."""
        self._fatal_error = message

    def _abort_with_fatal_error(self, message):
        """Queues up a fatal error and throws it immediately."""
        self._queue_fatal_error(message)
        return self._repeat_fatal_error()

    def _repeat_fatal_error(self):
        """Throws an error that has already been queued."""
        error = conf.fatal_error_suggestion().format(self._fatal_error)
        self.logger.error(error)
        self.ipython_display.send_error(error)
        return self._complete_cell()
Пример #22
0
class SparkKernelBase(IPythonKernel):
    def __init__(self, implementation, implementation_version, language, language_version, language_info,
                 kernel_conf_name, session_language, client_name, **kwargs):
        # Required by Jupyter - Override
        self.implementation = implementation
        self.implementation_version = implementation_version
        self.language = language
        self.language_version = language_version
        self.language_info = language_info

        # Override
        self.kernel_conf_name = kernel_conf_name
        self.session_language = session_language
        self.client_name = client_name

        super(SparkKernelBase, self).__init__(**kwargs)

        self._logger = Log(self.client_name)
        self._session_started = False
        self._fatal_error = None
        self._ipython_display = IpythonDisplay()

        self.user_command_parser = UserCommandParser()

        # Disable warnings for test env in HDI
        requests.packages.urllib3.disable_warnings()

        if not kwargs.get("testing", False):
            configuration = self._get_configuration()
            if not configuration:
                # _get_configuration() sets the error for us so we can just return now.
                # The kernel is not in a good state and all do_execute calls will
                # fail with the fatal error.
                return
            (username, password, url) = configuration
            self.connection_string = get_connection_string(url, username, password)
            self._load_magics_extension()
            if conf.use_auto_viz():
                self._register_auto_viz()

    def do_execute(self, code, silent, store_history=True, user_expressions=None, allow_stdin=False):
        if self._fatal_error is not None:
            self._repeat_fatal_error()

        # Parse command
        subcommand, force, output_var, command = self.user_command_parser.parse_user_command(code)

        # Get transformer
        transformer = self._get_code_transformer(subcommand)

        # Get instructions
        try:
            code_to_run, error_to_show, begin_action, end_action, deletes_session = \
                transformer.get_code_to_execute(self._session_started, self.connection_string,
                                                force, output_var, command)
        except SyntaxError as se:
            self._show_user_error("{}".format(se))
        else:
            # Execute instructions
            if error_to_show is not None:
                self._show_user_error(error_to_show)
                return self._execute_cell(code_to_run, silent, store_history, user_expressions, allow_stdin)

            if begin_action == Constants.delete_session_action:
                self._delete_session()
            elif begin_action == Constants.start_session_action:
                self._start_session()
            elif begin_action == Constants.do_nothing_action:
                pass
            else:
                raise ValueError("Begin action {} not supported.".format(begin_action))

            res = self._execute_cell(code_to_run, silent, store_history, user_expressions, allow_stdin)

            if end_action == Constants.delete_session_action:
                self._delete_session()
            elif end_action == Constants.start_session_action:
                self._start_session()
            elif end_action == Constants.do_nothing_action:
                pass
            else:
                raise ValueError("End action {} not supported.".format(end_action))

            if deletes_session:
                self._session_started = False

            return res

        return self._execute_cell("", silent, store_history, user_expressions, allow_stdin)

    def do_shutdown(self, restart):
        # Cleanup
        self._delete_session()

        return self._do_shutdown_ipykernel(restart)

    @staticmethod
    def _get_code_transformer(subcommand):
        if subcommand == UserCommandParser.run_command:
            return SparkTransformer(subcommand)
        elif subcommand == UserCommandParser.sql_command:
            return SqlTransformer(subcommand)
        elif subcommand == UserCommandParser.hive_command:
            return HiveTransformer(subcommand)
        elif subcommand == UserCommandParser.config_command:
            return ConfigTransformer(subcommand)
        elif subcommand == UserCommandParser.info_command:
            return InfoTransformer(subcommand)
        elif subcommand == UserCommandParser.delete_command:
            return DeleteSessionTransformer(subcommand)
        elif subcommand == UserCommandParser.clean_up_command:
            return CleanUpTransformer(subcommand)
        elif subcommand == UserCommandParser.logs_command:
            return LogsTransformer(subcommand)
        elif subcommand == UserCommandParser.local_command:
            return PythonTransformer(subcommand)
        else:
            return NotSupportedTransformer(subcommand)

    def _load_magics_extension(self):
        register_magics_code = "%load_ext remotespark"
        self._execute_cell(register_magics_code, True, False, shutdown_if_error=True,
                           log_if_error="Failed to load the Spark magics library.")
        self._logger.debug("Loaded magics.")

    def _register_auto_viz(self):
        register_auto_viz_code = """from remotespark.datawidgets.utils import display_dataframe
ip = get_ipython()
ip.display_formatter.ipython_display_formatter.for_type_by_name('pandas.core.frame', 'DataFrame', display_dataframe)"""
        self._execute_cell(register_auto_viz_code, True, False, shutdown_if_error=True,
                           log_if_error="Failed to register auto viz for notebook.")
        self._logger.debug("Registered auto viz.")

    def _start_session(self):
        if not self._session_started:
            self._session_started = True

            add_session_code = "%spark add {} {} {} skip".format(
                self.client_name, self.session_language, self.connection_string)
            self._execute_cell(add_session_code, True, False, shutdown_if_error=True,
                               log_if_error="Failed to create a Livy session.")
            self._logger.debug("Added session.")

    def _delete_session(self):
        if self._session_started:
            code = "%spark cleanup"
            self._execute_cell_for_user(code, True, False)
            self._session_started = False

    def _get_configuration(self):
        """Returns (username, password, url). If there is an error (missing configuration),
           returns False."""
        try:
            credentials = getattr(conf, 'kernel_' + self.kernel_conf_name + '_credentials')()
            ret = (credentials['username'], credentials['password'], credentials['url'])

            # The URL has to be set in the configuration.
            assert(ret[2])

            return ret
        except (KeyError, AssertionError):
            message = "Please set configuration for 'kernel_{}_credentials' to initialize Kernel".format(
                self.kernel_conf_name)
            self._queue_fatal_error(message)
            return False

    def _execute_cell(self, code, silent, store_history=True, user_expressions=None, allow_stdin=False,
                      shutdown_if_error=False, log_if_error=None):
        reply_content = self._execute_cell_for_user(code, silent, store_history, user_expressions, allow_stdin)

        if shutdown_if_error and reply_content[u"status"] == u"error":
            error_from_reply = reply_content[u"evalue"]
            if log_if_error is not None:
                message = "{}\nException details:\n\t\"{}\"".format(log_if_error, error_from_reply)
                self._abort_with_fatal_error(message)

        return reply_content

    def _execute_cell_for_user(self, code, silent, store_history=True, user_expressions=None, allow_stdin=False):
        return super(SparkKernelBase, self).do_execute(code, silent, store_history, user_expressions, allow_stdin)

    def _do_shutdown_ipykernel(self, restart):
        return super(SparkKernelBase, self).do_shutdown(restart)

    def _show_user_error(self, message):
        self._logger.error(message)
        self._ipython_display.send_error(message)

    def _queue_fatal_error(self, message):
        """Queues up a fatal error to be thrown when the next cell is executed; does not
        raise an error immediately. We use this for errors that happen on kernel startup,
        since IPython crashes if we throw an exception in the __init__ method."""
        self._fatal_error = message

    def _abort_with_fatal_error(self, message):
        """Queues up a fatal error and throws it immediately."""
        self._queue_fatal_error(message)
        self._repeat_fatal_error()

    def _repeat_fatal_error(self):
        """Throws an error that has already been queued."""
        error = conf.fatal_error_suggestion().format(self._fatal_error)
        self._logger.error(error)
        self._ipython_display.send_error(error)
        raise ValueError(self._fatal_error)
Пример #23
0
 def __init__(self, display=None):
     if display is None:
         self.display = IpythonDisplay()
     else:
         self.display = display