Exemplo n.º 1
0
    def __init__(self, reader_writer):
        assert reader_writer is not None

        self.logger = Log("ClientManagerStateSerializer")
        self._ipython_display = IpythonDisplay()

        self._reader_writer = reader_writer
Exemplo n.º 2
0
    def __init__(self, shell, data=None):
        # You must call the parent constructor
        super(SparkMagicBase, self).__init__(shell)

        self.logger = Log("SparkMagics")
        self.ipython_display = IpythonDisplay()
        self.spark_controller = SparkController(self.ipython_display)

        try:
            should_serialize = conf.serialize()
            if should_serialize:
                self.logger.debug("Serialization enabled.")

                self.magics_home_path = get_magics_home_path()
                path_to_serialize = join_paths(self.magics_home_path,
                                               "state.json")

                self.logger.debug(
                    "Will serialize to {}.".format(path_to_serialize))

                self.spark_controller = SparkController(
                    self.ipython_display, serialize_path=path_to_serialize)
            else:
                self.logger.debug("Serialization NOT enabled.")
        except KeyError:
            self.logger.error("Could not read env vars for serialization.")

        self.logger.debug("Initialized spark magics.")
Exemplo n.º 3
0
 def __init__(self, code, spark_events=None):
     super(Command, self).__init__()
     self.code = textwrap.dedent(code)
     self.logger = Log("Command")
     if spark_events is None:
         spark_events = SparkEvents()
     self._spark_events = spark_events
Exemplo n.º 4
0
class Command(ObjectWithGuid):
    def __init__(self, code, spark_events=None):
        super(Command, self).__init__()
        self.code = textwrap.dedent(code)
        self.logger = Log(u"Command")
        if spark_events is None:
            spark_events = SparkEvents()
        self._spark_events = spark_events

    def __eq__(self, other):
        return self.code == other.code

    def __ne__(self, other):
        return not self == other

    def execute(self, session):
        self._spark_events.emit_statement_execution_start_event(session.guid, session.kind, session.id, self.guid)
        statement_id = -1
        try:
            session.wait_for_idle()
            data = {u"code": self.code}
            response = session.http_client.post_statement(session.id, data)
            statement_id = response[u'id']
            output = self._get_statement_output(session, statement_id)
        except Exception as e:
            self._spark_events.emit_statement_execution_end_event(session.guid, session.kind, session.id,
                                                                  self.guid, statement_id, False, e.__class__.__name__,
                                                                  str(e))
            raise
        else:
            self._spark_events.emit_statement_execution_end_event(session.guid, session.kind, session.id,
                                                                  self.guid, statement_id, True, "", "")
            return output

    def _get_statement_output(self, session, statement_id):
        statement_running = True
        out = u""
        while statement_running:
            statement = session.http_client.get_statement(session.id, statement_id)
            status = statement[u"state"]

            self.logger.debug(u"Status of statement {} is {}.".format(statement_id, status))

            if status == u"running":
                session.sleep()
            else:
                statement_running = False

                statement_output = statement[u"output"]
                if statement_output[u"status"] == u"ok":
                    out = (True, statement_output[u"data"][u"text/plain"])
                elif statement_output[u"status"] == u"error":
                    out = (False,
                           statement_output[u"evalue"] + u"\n" + u"".join(statement_output[u"traceback"]))
                else:
                    raise LivyUnexpectedStatusException(u"Unknown output status from Livy: '{}'"
                                                        .format(statement_output[u"status"]))

        return out
Exemplo n.º 5
0
class EventsHandler:
    def __init__(self):
        self.logger = Log("EventsHandler")

    def handle_event(self, kwargs_list):
        """
        Storing the Event details using the logger.
        """
        event_line = ",".join("{}: {}".format(key, arg) for key, arg in kwargs_list)
        self.logger.info(event_line)
Exemplo n.º 6
0
    def __init__(self, ipython_display, serialize_path=None):
        self.logger = Log("SparkController")
        self.ipython_display = ipython_display

        if serialize_path is not None:
            serializer = ClientManagerStateSerializer(
                FileSystemReaderWriter(serialize_path))
            self.client_manager = ClientManager(serializer)
        else:
            self.client_manager = ClientManager()
Exemplo n.º 7
0
class EventsHandler:
    def __init__(self):
        self.logger = Log("EventsHandler")

    def handle_event(self, kwargs_list):
        """
        Storing the Event details using the logger.
        """
        event_line = ",".join("{}: {}".format(key, arg)
                              for key, arg in kwargs_list)
        self.logger.info(event_line)
Exemplo n.º 8
0
    def __init__(self, endpoint, headers, retry_policy):
        self._endpoint = endpoint
        self._headers = headers
        self._retry_policy = retry_policy
        self.logger = Log("ReliableHttpClient")

        self.verify_ssl = not conf.ignore_ssl_errors()
        if not self.verify_ssl:
            self.logger.debug(
                "ATTENTION: Will ignore SSL errors. This might render you vulnerable to attacks."
            )
            requests.packages.urllib3.disable_warnings()
Exemplo n.º 9
0
    def __init__(self, shell, data=None, spark_events=None):
        # You must call the parent constructor
        super(SparkMagicBase, self).__init__(shell)

        self.logger = Log("SparkMagics")
        self.ipython_display = IpythonDisplay()
        self.spark_controller = SparkController(self.ipython_display)

        self.logger.debug("Initialized spark magics.")

        if spark_events is None:
            spark_events = SparkEvents()
        spark_events.emit_library_loaded_event()
Exemplo n.º 10
0
class SparkMagicBase(Magics):
    def __init__(self, shell, data=None):
        # You must call the parent constructor
        super(SparkMagicBase, self).__init__(shell)

        self.logger = Log("SparkMagics")
        self.ipython_display = IpythonDisplay()
        self.spark_controller = SparkController(self.ipython_display)

        try:
            should_serialize = conf.serialize()
            if should_serialize:
                self.logger.debug("Serialization enabled.")

                self.magics_home_path = get_magics_home_path()
                path_to_serialize = join_paths(self.magics_home_path,
                                               "state.json")

                self.logger.debug(
                    "Will serialize to {}.".format(path_to_serialize))

                self.spark_controller = SparkController(
                    self.ipython_display, serialize_path=path_to_serialize)
            else:
                self.logger.debug("Serialization NOT enabled.")
        except KeyError:
            self.logger.error("Could not read env vars for serialization.")

        self.logger.debug("Initialized spark magics.")

    def execute_sqlquery(self, sqlquery, session, output_var, quiet):
        try:
            df = self.spark_controller.run_cell_sql(sqlquery, session)
            if output_var is not None:
                self.shell.user_ns[output_var] = df
            if quiet:
                return None
            else:
                return df
        except DataFrameParseException as e:
            self.ipython_display.send_error(e.out)
            return None

    @staticmethod
    def print_endpoint_info(info_sessions):
        sessions_info = ["        {}".format(i) for i in info_sessions]
        print("""Info for endpoint:
    Sessions:
{}
""".format("\n".join(sessions_info)))
Exemplo n.º 11
0
    def __init__(self, shell, data=None):
        # You must call the parent constructor
        super(SparkMagicBase, self).__init__(shell)

        self.logger = Log("SparkMagics")
        self.ipython_display = IpythonDisplay()
        self.spark_controller = SparkController(self.ipython_display)

        try:
            should_serialize = conf.serialize()
            if should_serialize:
                self.logger.debug("Serialization enabled.")

                self.magics_home_path = get_magics_home_path()
                path_to_serialize = join_paths(self.magics_home_path, "state.json")

                self.logger.debug("Will serialize to {}.".format(path_to_serialize))

                self.spark_controller = SparkController(self.ipython_display, serialize_path=path_to_serialize)
            else:
                self.logger.debug("Serialization NOT enabled.")
        except KeyError:
            self.logger.error("Could not read env vars for serialization.")

        self.logger.debug("Initialized spark magics.")
Exemplo n.º 12
0
    def __init__(self, implementation, implementation_version, language, language_version, language_info,
                 session_language, user_code_parser=None, **kwargs):
        # Required by Jupyter - Override
        self.implementation = implementation
        self.implementation_version = implementation_version
        self.language = language
        self.language_version = language_version
        self.language_info = language_info

        # Override
        self.session_language = session_language

        super(SparkKernelBase, self).__init__(**kwargs)

        self.logger = Log("_jupyter_kernel".format(self.session_language))
        self._fatal_error = None
        self.ipython_display = IpythonDisplay()

        if user_code_parser is None:
            self.user_code_parser = UserCodeParser()
        else:
            self.user_code_parser = user_code_parser

        # Disable warnings for test env in HDI
        requests.packages.urllib3.disable_warnings()

        if not kwargs.get("testing", False):
            self._load_magics_extension()
            self._change_language()
            if conf.use_auto_viz():
                self._register_auto_viz()
    def __init__(self, reader_writer):
        assert reader_writer is not None

        self.logger = Log("ClientManagerStateSerializer")
        self._ipython_display = IpythonDisplay()

        self._reader_writer = reader_writer
Exemplo n.º 14
0
 def __init__(self, code, spark_events=None):
     super(Command, self).__init__()
     self.code = textwrap.dedent(code)
     self.logger = Log(u"Command")
     if spark_events is None:
         spark_events = SparkEvents()
     self._spark_events = spark_events
Exemplo n.º 15
0
class SparkMagicBase(Magics):
    def __init__(self, shell, data=None):
        # You must call the parent constructor
        super(SparkMagicBase, self).__init__(shell)

        self.logger = Log("SparkMagics")
        self.ipython_display = IpythonDisplay()
        self.spark_controller = SparkController(self.ipython_display)

        try:
            should_serialize = conf.serialize()
            if should_serialize:
                self.logger.debug("Serialization enabled.")

                self.magics_home_path = get_magics_home_path()
                path_to_serialize = join_paths(self.magics_home_path, "state.json")

                self.logger.debug("Will serialize to {}.".format(path_to_serialize))

                self.spark_controller = SparkController(self.ipython_display, serialize_path=path_to_serialize)
            else:
                self.logger.debug("Serialization NOT enabled.")
        except KeyError:
            self.logger.error("Could not read env vars for serialization.")

        self.logger.debug("Initialized spark magics.")

    def execute_sqlquery(self, sqlquery, session, output_var, quiet):
        try:
            df = self.spark_controller.run_cell_sql(sqlquery, session)
            if output_var is not None:
                self.shell.user_ns[output_var] = df
            if quiet:
                return None
            else:
                return df
        except DataFrameParseException as e:
            self.ipython_display.send_error(e.out)
            return None

    @staticmethod
    def print_endpoint_info(info_sessions):
        sessions_info = ["        {}".format(i) for i in info_sessions]
        print("""Info for endpoint:
    Sessions:
{}
""".format("\n".join(sessions_info)))
    def __init__(self, client_factory, reader_writer):
        assert client_factory is not None
        assert reader_writer is not None

        self.logger = Log("ClientManagerStateSerializer")

        self._client_factory = client_factory
        self._reader_writer = reader_writer
Exemplo n.º 17
0
    def __init__(self, http_client, properties, ipython_display,
                 session_id=-1, sql_created=None, spark_events=None):
        super(LivySession, self).__init__()
        assert "kind" in list(properties.keys())
        kind = properties["kind"]
        self.properties = properties
        self.ipython_display = ipython_display

        if spark_events is None:
            spark_events = SparkEvents()
        self._spark_events = spark_events

        status_sleep_seconds = conf.status_sleep_seconds()
        statement_sleep_seconds = conf.statement_sleep_seconds()
        wait_for_idle_timeout_seconds = conf.wait_for_idle_timeout_seconds()

        assert status_sleep_seconds > 0
        assert statement_sleep_seconds > 0
        assert wait_for_idle_timeout_seconds > 0
        if session_id == -1 and sql_created is True:
            raise BadUserDataException("Cannot indicate sql state without session id.")

        self.logger = Log("LivySession")

        kind = kind.lower()
        if kind not in constants.SESSION_KINDS_SUPPORTED:
            raise BadUserDataException("Session of kind '{}' not supported. Session must be of kinds {}."
                                       .format(kind, ", ".join(constants.SESSION_KINDS_SUPPORTED)))

        if session_id == -1:
            self.status = constants.NOT_STARTED_SESSION_STATUS
            sql_created = False
        else:
            self.status = constants.BUSY_SESSION_STATUS

        self._logs = ""
        self._http_client = http_client
        self._status_sleep_seconds = status_sleep_seconds
        self._statement_sleep_seconds = statement_sleep_seconds
        self._wait_for_idle_timeout_seconds = wait_for_idle_timeout_seconds

        self.kind = kind
        self.id = session_id
        self.created_sql_context = sql_created
Exemplo n.º 18
0
    def __init__(self, ipython_display, serialize_path=None):
        self.logger = Log("SparkController")
        self.ipython_display = ipython_display
        self.client_factory = LivyClientFactory()

        if serialize_path is not None:
            serializer = ClientManagerStateSerializer(self.client_factory, FileSystemReaderWriter(serialize_path))
            self.client_manager = ClientManager(serializer)
        else:
            self.client_manager = ClientManager()
Exemplo n.º 19
0
class Command(ObjectWithGuid):
    def __init__(self, code):
        super(Command, self).__init__()
        self.code = textwrap.dedent(code)
        self.logger = Log("Command")

    def __eq__(self, other):
        return self.code == other.code

    def __ne__(self, other):
        return not self == other

    def execute(self, session):
        session.wait_for_idle()
        data = {"code": self.code}
        response = session.http_client.post_statement(session.id, data)
        statement_id = response['id']
        return self._get_statement_output(session, statement_id)

    def _get_statement_output(self, session, statement_id):
        statement_running = True
        out = ""
        while statement_running:
            statement = session.http_client.get_statement(session.id, statement_id)
            status = statement["state"]

            self.logger.debug("Status of statement {} is {}.".format(statement_id, status))

            if status == "running":
                session.sleep()
            else:
                statement_running = False

                statement_output = statement["output"]
                if statement_output["status"] == "ok":
                    out = (True, statement_output["data"]["text/plain"])
                elif statement_output["status"] == "error":
                    out = (False,
                           statement_output["evalue"] + "\n" + "".join(statement_output["traceback"]))
                else:
                    raise ValueError("Unknown output status: '{}'".format(statement_output["status"]))

        return out
Exemplo n.º 20
0
    def __init__(self, endpoint, headers, retry_policy):
        self._endpoint = endpoint
        self._headers = headers
        self._retry_policy = retry_policy
        self.logger = Log("ReliableHttpClient")

        self.verify_ssl = not conf.ignore_ssl_errors()
        if not self.verify_ssl:
            self.logger.debug("ATTENTION: Will ignore SSL errors. This might render you vulnerable to attacks.")
            requests.packages.urllib3.disable_warnings()
Exemplo n.º 21
0
    def __init__(self, serializer=None):
        serialize_periodically = False
        serialize_period = 3

        if serializer is not None:
            serialize_periodically = conf.serialize_periodically()
            serialize_period = conf.serialize_period_seconds()

        self.logger = Log("ClientManager")

        self._livy_clients = dict()
        self._serializer = serializer
        self._serialize_timer = None

        if self._serializer is not None:
            for (name, client) in self._serializer.deserialize_state():
                self.add_client(name, client)

            if serialize_periodically:
                self._serialize_state_periodically(serialize_period)
    def __init__(self, url, headers, username, password, retry_policy):
        self._url = url.rstrip("/")
        self._headers = headers
        self._username = username
        self._password = password
        self._retry_policy = retry_policy
        self.logger = Log("ReliableHttpClient")

        self._do_not_authenticate = self._username == "" and self._password == ""

        self.verify_ssl = not conf.ignore_ssl_errors()
        if self.verify_ssl:
            self.logger.debug("ATTENTION: Will ignore SSL errors. This might render you vulnerable to attacks.")
Exemplo n.º 23
0
    def __init__(self,
                 implementation,
                 implementation_version,
                 language,
                 language_version,
                 language_info,
                 session_language,
                 user_code_parser=None,
                 **kwargs):
        # Required by Jupyter - Override
        self.implementation = implementation
        self.implementation_version = implementation_version
        self.language = language
        self.language_version = language_version
        self.language_info = language_info

        # Override
        self.session_language = session_language

        super(SparkKernelBase, self).__init__(**kwargs)

        self.logger = Log("_jupyter_kernel".format(self.session_language))
        self._fatal_error = None
        self.ipython_display = IpythonDisplay()

        if user_code_parser is None:
            self.user_code_parser = UserCodeParser()
        else:
            self.user_code_parser = user_code_parser

        # Disable warnings for test env in HDI
        requests.packages.urllib3.disable_warnings()

        if not kwargs.get("testing", False):
            self._load_magics_extension()
            self._change_language()
            if conf.use_auto_viz():
                self._register_auto_viz()
Exemplo n.º 24
0
    def __init__(self, http_client, properties, ipython_display,
                 session_id="-1", sql_created=None):
        assert "kind" in list(properties.keys())
        kind = properties["kind"]
        self.properties = properties
        self.ipython_display = ipython_display

        status_sleep_seconds = conf.status_sleep_seconds()
        statement_sleep_seconds = conf.statement_sleep_seconds()
        create_sql_context_timeout_seconds = conf.create_sql_context_timeout_seconds()

        assert status_sleep_seconds > 0
        assert statement_sleep_seconds > 0
        assert create_sql_context_timeout_seconds > 0
        if session_id == "-1" and sql_created is True:
            raise ValueError("Cannot indicate sql state without session id.")

        self.logger = Log("LivySession")

        kind = kind.lower()
        if kind not in constants.SESSION_KINDS_SUPPORTED:
            raise ValueError("Session of kind '{}' not supported. Session must be of kinds {}."
                             .format(kind, ", ".join(constants.SESSION_KINDS_SUPPORTED)))

        if session_id == "-1":
            self.status = constants.NOT_STARTED_SESSION_STATUS
            sql_created = False
        else:
            self.status = constants.BUSY_SESSION_STATUS

        self._logs = ""
        self._http_client = http_client
        self._status_sleep_seconds = status_sleep_seconds
        self._statement_sleep_seconds = statement_sleep_seconds
        self._create_sql_context_timeout_seconds = create_sql_context_timeout_seconds

        self._state = LivySessionState(session_id, self._http_client.connection_string,
                                       kind, sql_created)
Exemplo n.º 25
0
class SparkMagicBase(Magics):
    def __init__(self, shell, data=None, spark_events=None):
        # You must call the parent constructor
        super(SparkMagicBase, self).__init__(shell)

        self.logger = Log("SparkMagics")
        self.ipython_display = IpythonDisplay()
        self.spark_controller = SparkController(self.ipython_display)

        self.logger.debug("Initialized spark magics.")

        if spark_events is None:
            spark_events = SparkEvents()
        spark_events.emit_library_loaded_event()

    def execute_sqlquery(self, cell, samplemethod, maxrows, samplefraction,
                         session, output_var, quiet):
        sqlquery = self._sqlquery(cell, samplemethod, maxrows, samplefraction)
        df = self.spark_controller.run_sqlquery(sqlquery, session)
        if output_var is not None:
            self.shell.user_ns[output_var] = df
        if quiet:
            return None
        else:
            return df

    @staticmethod
    def _sqlquery(cell, samplemethod, maxrows, samplefraction):
        return SQLQuery(cell, samplemethod, maxrows, samplefraction)

    @staticmethod
    def print_endpoint_info(info_sessions):
        sessions_info = ["        {}".format(i) for i in info_sessions]
        print("""Info for endpoint:
    Sessions:
{}
""".format("\n".join(sessions_info)))
Exemplo n.º 26
0
    def __init__(self, http_client, properties, ipython_display,
                 session_id=-1, sql_created=None, spark_events=None):
        super(LivySession, self).__init__()
        assert u"kind" in list(properties.keys())
        kind = properties[u"kind"]
        self.properties = properties
        self.ipython_display = ipython_display

        if spark_events is None:
            spark_events = SparkEvents()
        self._spark_events = spark_events

        status_sleep_seconds = conf.status_sleep_seconds()
        statement_sleep_seconds = conf.statement_sleep_seconds()
        wait_for_idle_timeout_seconds = conf.wait_for_idle_timeout_seconds()

        assert status_sleep_seconds > 0
        assert statement_sleep_seconds > 0
        assert wait_for_idle_timeout_seconds > 0
        if session_id == -1 and sql_created is True:
            raise BadUserDataException(u"Cannot indicate sql state without session id.")

        self.logger = Log(u"LivySession")

        kind = kind.lower()
        if kind not in constants.SESSION_KINDS_SUPPORTED:
            raise BadUserDataException(u"Session of kind '{}' not supported. Session must be of kinds {}."
                                       .format(kind, ", ".join(constants.SESSION_KINDS_SUPPORTED)))

        if session_id == -1:
            self.status = constants.NOT_STARTED_SESSION_STATUS
            sql_created = False
        else:
            self.status = constants.BUSY_SESSION_STATUS

        self._app_id = None
        self._logs = u""
        self._http_client = http_client
        self._status_sleep_seconds = status_sleep_seconds
        self._statement_sleep_seconds = statement_sleep_seconds
        self._wait_for_idle_timeout_seconds = wait_for_idle_timeout_seconds

        self.kind = kind
        self.id = session_id
        self.created_sql_context = sql_created
Exemplo n.º 27
0
    def __init__(self, serializer=None):
        serialize_periodically = False
        serialize_period = 3

        if serializer is not None:
            serialize_periodically = conf.serialize_periodically()
            serialize_period = conf.serialize_period_seconds()

        self.logger = Log("ClientManager")

        self._livy_clients = dict()
        self._serializer = serializer
        self._serialize_timer = None

        if self._serializer is not None:
            for (name, client) in self._serializer.deserialize_state():
                self.add_client(name, client)

            if serialize_periodically:
                self._serialize_state_periodically(serialize_period)
Exemplo n.º 28
0
    def __init__(self, implementation, implementation_version, language, language_version, language_info,
                 kernel_conf_name, session_language, client_name, **kwargs):
        # Required by Jupyter - Override
        self.implementation = implementation
        self.implementation_version = implementation_version
        self.language = language
        self.language_version = language_version
        self.language_info = language_info

        # Override
        self.kernel_conf_name = kernel_conf_name
        self.session_language = session_language
        self.client_name = client_name

        super(SparkKernelBase, self).__init__(**kwargs)

        self._logger = Log(self.client_name)
        self._session_started = False
        self._fatal_error = None
        self._ipython_display = IpythonDisplay()

        self.user_command_parser = UserCommandParser()

        # Disable warnings for test env in HDI
        requests.packages.urllib3.disable_warnings()

        if not kwargs.get("testing", False):
            configuration = self._get_configuration()
            if not configuration:
                # _get_configuration() sets the error for us so we can just return now.
                # The kernel is not in a good state and all do_execute calls will
                # fail with the fatal error.
                return
            (username, password, url) = configuration
            self.connection_string = get_connection_string(url, username, password)
            self._load_magics_extension()
            if conf.use_auto_viz():
                self._register_auto_viz()
Exemplo n.º 29
0
    def __init__(self, ipython_display, http_client, session_id, sql_created, properties):
        assert "kind" in properties.keys()
        kind = properties["kind"]
        self.properties = properties
        self.ipython_display = ipython_display

        status_sleep_seconds = conf.status_sleep_seconds()
        statement_sleep_seconds = conf.statement_sleep_seconds()
        create_sql_context_timeout_seconds = conf.create_sql_context_timeout_seconds()

        assert status_sleep_seconds > 0
        assert statement_sleep_seconds > 0
        assert create_sql_context_timeout_seconds > 0
        if session_id == "-1" and sql_created is True:
            raise ValueError("Cannot indicate sql state without session id.")

        self.logger = Log("LivySession")

        kind = kind.lower()
        if kind not in Constants.session_kinds_supported:
            raise ValueError("Session of kind '{}' not supported. Session must be of kinds {}."
                             .format(kind, ", ".join(Constants.session_kinds_supported)))

        if session_id == "-1":
            self._status = Constants.not_started_session_status
            sql_created = False
        else:
            self._status = Constants.busy_session_status

        self._logs = ""
        self._http_client = http_client
        self._status_sleep_seconds = status_sleep_seconds
        self._statement_sleep_seconds = statement_sleep_seconds
        self._create_sql_context_timeout_seconds = create_sql_context_timeout_seconds

        self._state = LivySessionState(session_id, http_client.connection_string,
                                       kind, sql_created)
Exemplo n.º 30
0
class SparkController(object):
    def __init__(self, ipython_display, serialize_path=None):
        self.logger = Log("SparkController")
        self.ipython_display = ipython_display

        if serialize_path is not None:
            serializer = ClientManagerStateSerializer(
                FileSystemReaderWriter(serialize_path))
            self.client_manager = ClientManager(serializer)
        else:
            self.client_manager = ClientManager()

    def get_logs(self, client_name=None):
        client_to_use = self.get_client_by_name_or_default(client_name)
        return client_to_use.get_logs()

    def run_cell(self, cell, client_name=None):
        client_to_use = self.get_client_by_name_or_default(client_name)
        return client_to_use.execute(cell)

    def run_cell_sql(self, sqlquery, client_name=None):
        client_to_use = self.get_client_by_name_or_default(client_name)
        return client_to_use.execute_sql(sqlquery)

    def get_all_sessions_endpoint(self, connection_string):
        http_client = self._http_client_from_connection_string(
            connection_string)
        r = http_client.get("/sessions", [200])
        sessions = r.json()["sessions"]
        session_list = [
            self._create_livy_session(connection_string, {"kind": s["kind"]},
                                      self.ipython_display, s["id"])
            for s in sessions
        ]
        for s in session_list:
            s._refresh_status()
        return session_list

    def get_all_sessions_endpoint_info(self, connection_string):
        sessions = self.get_all_sessions_endpoint(connection_string)
        return [str(s) for s in sessions]

    def cleanup(self):
        self.client_manager.clean_up_all()

    def cleanup_endpoint(self, connection_string):
        for session in self.get_all_sessions_endpoint(connection_string):
            session.delete()

    def delete_session_by_name(self, name):
        self.client_manager.delete_client(name)

    def delete_session_by_id(self, connection_string, session_id):
        http_client = self._http_client_from_connection_string(
            connection_string)
        r = http_client.get("/sessions/{}".format(session_id), [200, 404])
        if r.status_code != 404:
            session = self._create_livy_session(connection_string,
                                                {"kind": r.json()["kind"]},
                                                self.ipython_display,
                                                session_id, False)
            session.delete()

    def add_session(self, name, connection_string, skip_if_exists, properties):
        if skip_if_exists and (name
                               in self.client_manager.get_sessions_list()):
            self.logger.debug(
                "Skipping {} because it already exists in list of sessions.".
                format(name))
            return

        session = self._create_livy_session(connection_string, properties,
                                            self.ipython_display)
        session.start()

        livy_client = self._create_livy_client(session)
        self.client_manager.add_client(name, livy_client)
        livy_client.start()

    def get_session_id_for_client(self, name):
        return self.client_manager.get_session_id_for_client(name)

    def get_client_keys(self):
        return self.client_manager.get_sessions_list()

    def get_manager_sessions_str(self):
        return self.client_manager.get_sessions_info()

    def get_client_by_name_or_default(self, client_name):
        if client_name is None:
            return self.client_manager.get_any_client()
        else:
            client_name = client_name.lower()
            return self.client_manager.get_client(client_name)

    def get_managed_clients(self):
        return self.client_manager.livy_clients

    @staticmethod
    def _create_livy_session(*args, **kwargs):
        return LivySession.from_connection_string(*args, **kwargs)

    @staticmethod
    def _http_client_from_connection_string(connection_string):
        return LivyReliableHttpClient.from_connection_string(connection_string)

    @staticmethod
    def _create_livy_client(session):
        return LivyClient(session)
Exemplo n.º 31
0
 def __init__(self, code):
     super(Command, self).__init__()
     self.code = textwrap.dedent(code)
     self.logger = Log("Command")
Exemplo n.º 32
0
class ClientManager(object):
    """Livy client manager"""
    def __init__(self, serializer=None):
        serialize_periodically = False
        serialize_period = 3

        if serializer is not None:
            serialize_periodically = conf.serialize_periodically()
            serialize_period = conf.serialize_period_seconds()

        self.logger = Log("ClientManager")

        self._livy_clients = dict()
        self._serializer = serializer
        self._serialize_timer = None

        if self._serializer is not None:
            for (name, client) in self._serializer.deserialize_state():
                self.add_client(name, client)

            if serialize_periodically:
                self._serialize_state_periodically(serialize_period)

    def _serialize_state_periodically(self, serialize_period):
        self.logger.debug("Starting state serialize timer.")

        self._serialize_timer = Timer(serialize_period, self._serialize_state)
        self._serialize_timer.start()

    def _serialize_state(self):
        self._serializer.serialize_state(self._livy_clients)

    @property
    def livy_clients(self):
        return self._livy_clients

    def get_sessions_list(self):
        return list(self._livy_clients.keys())

    def get_sessions_info(self):
        return [
            "Name: {}\t{}".format(k, str(self._livy_clients[k]))
            for k in list(self._livy_clients.keys())
        ]

    def add_client(self, name, livy_client):
        if name in self.get_sessions_list():
            raise ValueError(
                "Session with name '{}' already exists. Please delete the session"
                " first if you intend to replace it.".format(name))

        self._livy_clients[name] = livy_client

    def get_any_client(self):
        number_of_sessions = len(self._livy_clients)
        if number_of_sessions == 1:
            key = self.get_sessions_list()[0]
            return self._livy_clients[key]
        elif number_of_sessions == 0:
            raise AssertionError(
                "You need to have at least 1 client created to execute commands."
            )
        else:
            raise AssertionError(
                "Please specify the client to use. Possible sessions are {}".
                format(self.get_sessions_list()))

    def get_client(self, name):
        if name in self.get_sessions_list():
            return self._livy_clients[name]
        raise ValueError(
            "Could not find '{}' session in list of saved sessions. Possible sessions are {}"
            .format(name, self.get_sessions_list()))

    def get_session_id_for_client(self, name):
        if name in self.get_sessions_list():
            return self._livy_clients[name].session_id
        return None

    def delete_client(self, name):
        self._remove_session(name)

    def clean_up_all(self):
        for name in self.get_sessions_list():
            self._remove_session(name)

        if self._serializer is not None:
            self._serialize_state()

    def _remove_session(self, name):
        if name in self.get_sessions_list():
            self._livy_clients[name].close_session()
            del self._livy_clients[name]
        else:
            raise ValueError(
                "Could not find '{}' session in list of saved sessions. Possible sessions are {}"
                .format(name, self.get_sessions_list()))
Exemplo n.º 33
0
class ReliableHttpClient(object):
    """Http client that is reliable in its requests. Uses requests library."""

    def __init__(self, endpoint, headers, retry_policy):
        self._endpoint = endpoint
        self._headers = headers
        self._retry_policy = retry_policy
        self.logger = Log("ReliableHttpClient")

        self.verify_ssl = not conf.ignore_ssl_errors()
        if not self.verify_ssl:
            self.logger.debug("ATTENTION: Will ignore SSL errors. This might render you vulnerable to attacks.")
            requests.packages.urllib3.disable_warnings()

    def compose_url(self, relative_url):
        r_u = "/{}".format(relative_url.rstrip("/").lstrip("/"))
        return self._endpoint.url + r_u

    def get(self, relative_url, accepted_status_codes):
        """Sends a get request. Returns a response."""
        return self._send_request(relative_url, accepted_status_codes, requests.get)

    def post(self, relative_url, accepted_status_codes, data):
        """Sends a post request. Returns a response."""
        return self._send_request(relative_url, accepted_status_codes, requests.post, data)

    def delete(self, relative_url, accepted_status_codes):
        """Sends a delete request. Returns a response."""
        return self._send_request(relative_url, accepted_status_codes, requests.delete)

    def _send_request(self, relative_url, accepted_status_codes, function, data=None):
        return self._send_request_helper(self.compose_url(relative_url), accepted_status_codes, function, data, 0)

    def _send_request_helper(self, url, accepted_status_codes, function, data, retry_count):
        while True:
            try:
                if not self._endpoint.authenticate:
                    if data is None:
                        r = function(url, headers=self._headers, verify=self.verify_ssl)
                    else:
                        r = function(url, headers=self._headers, data=json.dumps(data), verify=self.verify_ssl)
                else:
                    if data is None:
                        r = function(url, headers=self._headers, auth=(self._endpoint.username, self._endpoint.password),
                                     verify=self.verify_ssl)
                    else:
                        r = function(url, headers=self._headers, auth=(self._endpoint.username, self._endpoint.password),
                                     data=json.dumps(data), verify=self.verify_ssl)
            except requests.exceptions.RequestException as e:
                error = True
                r = None
                status = None

                self.logger.error("Request to '{}' failed with '{}'".format(url, e))
            else:
                error = False
                status = r.status_code

            if error or status not in accepted_status_codes:
                if self._retry_policy.should_retry(status, error, retry_count):
                    sleep(self._retry_policy.seconds_to_sleep(retry_count))
                    retry_count += 1
                    continue
                else:
                    raise HttpClientException("Invalid status code '{}' or error '{}' from {}"
                                              .format(status, error, url))
            return r
class ClientManagerStateSerializer(object):
    """Livy client manager state serializer"""

    def __init__(self, reader_writer):
        assert reader_writer is not None

        self.logger = Log("ClientManagerStateSerializer")
        self._ipython_display = IpythonDisplay()

        self._reader_writer = reader_writer

    def deserialize_state(self):
        self.logger.debug("Deserializing state.")

        clients_to_return = []

        lines = self._reader_writer.read_lines()
        line = ''.join(lines).strip()

        if line != '':
            self.logger.debug("Read content. Converting to JSON.")
            json_str = json.loads(line)
            clients = json_str["clients"]

            for client in clients:
                # Ignore version for now
                name = client["name"]
                session_id = client["id"]
                sql_context_created = client["sqlcontext"]
                kind = client["kind"].lower()
                connection_string = client["connectionstring"]

                session = self._create_livy_session(connection_string, {"kind": kind}, self._ipython_display,
                                                    session_id, sql_context_created)

                # Do not start session automatically. Just create it but skip is not existent.
                try:
                    # Get status to know if it's alive or not.
                    status = session.status
                    if not session.is_final_status(status):
                        self.logger.debug("Adding session {}".format(session_id))
                        client_obj = self._create_livy_client(session)
                        clients_to_return.append((name, client_obj))
                    else:
                        self.logger.error("Skipping serialized session '{}' because session was in status {}."
                                          .format(session.id, status))
                except (ValueError, ConnectionError) as e:
                    self.logger.error("Skipping serialized session '{}' because {}".format(session.id, str(e)))
        else:
            self.logger.debug("Empty manager state found.")

        return clients_to_return

    def serialize_state(self, name_client_dictionary):
        self.logger.debug("Serializing state.")

        serialized_clients = []
        for name in list(name_client_dictionary.keys()):
            client = name_client_dictionary[name]
            serialized_client = client.serialize()
            serialized_client["name"] = name
            serialized_clients.append(serialized_client)

        serialized_str = json.dumps({"clients": serialized_clients})
        self._reader_writer.overwrite_with_line(serialized_str)

    def _create_livy_session(self, connection_string, properties, ipython_display,
                             session_id, sql_context_created):
        return LivySession.from_connection_string(connection_string, properties, ipython_display,
                                                  session_id, sql_context_created)

    def _create_livy_client(self, session):
        return LivyClient(session)
Exemplo n.º 35
0
class SparkKernelBase(IPythonKernel):
    def __init__(self, implementation, implementation_version, language, language_version, language_info,
                 session_language, user_code_parser=None, **kwargs):
        # Required by Jupyter - Override
        self.implementation = implementation
        self.implementation_version = implementation_version
        self.language = language
        self.language_version = language_version
        self.language_info = language_info

        # Override
        self.session_language = session_language

        super(SparkKernelBase, self).__init__(**kwargs)

        self.logger = Log("_jupyter_kernel".format(self.session_language))
        self._fatal_error = None
        self.ipython_display = IpythonDisplay()

        if user_code_parser is None:
            self.user_code_parser = UserCodeParser()
        else:
            self.user_code_parser = user_code_parser

        # Disable warnings for test env in HDI
        requests.packages.urllib3.disable_warnings()

        if not kwargs.get("testing", False):
            self._load_magics_extension()
            self._change_language()
            if conf.use_auto_viz():
                self._register_auto_viz()

    def do_execute(self, code, silent, store_history=True, user_expressions=None, allow_stdin=False):
        def f(self):
            if self._fatal_error is not None:
                return self._repeat_fatal_error()

            return self._do_execute(code, silent, store_history, user_expressions, allow_stdin)
        return wrap_unexpected_exceptions(f, self._complete_cell)(self)

    def do_shutdown(self, restart):
        # Cleanup
        self._delete_session()

        return self._do_shutdown_ipykernel(restart)

    def _do_execute(self, code, silent, store_history, user_expressions, allow_stdin):
        code_to_run = self.user_code_parser.get_code_to_run(code)

        res = self._execute_cell(code_to_run, silent, store_history, user_expressions, allow_stdin)

        return res

    def _load_magics_extension(self):
        register_magics_code = "%load_ext remotespark.kernels"
        self._execute_cell(register_magics_code, True, False, shutdown_if_error=True,
                           log_if_error="Failed to load the Spark kernels magics library.")
        self.logger.debug("Loaded magics.")

    def _change_language(self):
        register_magics_code = "%%_do_not_call_change_language -l {}\n ".format(self.session_language)
        self._execute_cell(register_magics_code, True, False, shutdown_if_error=True,
                           log_if_error="Failed to change language to {}.".format(self.session_language))
        self.logger.debug("Changed language.")

    def _register_auto_viz(self):
        register_auto_viz_code = """from remotespark.datawidgets.utils import display_dataframe
ip = get_ipython()
ip.display_formatter.ipython_display_formatter.for_type_by_name('pandas.core.frame', 'DataFrame', display_dataframe)"""
        self._execute_cell(register_auto_viz_code, True, False, shutdown_if_error=True,
                           log_if_error="Failed to register auto viz for notebook.")
        self.logger.debug("Registered auto viz.")

    def _delete_session(self):
        code = "%%_do_not_call_delete_session\n "
        self._execute_cell_for_user(code, True, False)

    def _execute_cell(self, code, silent, store_history=True, user_expressions=None, allow_stdin=False,
                      shutdown_if_error=False, log_if_error=None):
        reply_content = self._execute_cell_for_user(code, silent, store_history, user_expressions, allow_stdin)

        if shutdown_if_error and reply_content[u"status"] == u"error":
            error_from_reply = reply_content[u"evalue"]
            if log_if_error is not None:
                message = "{}\nException details:\n\t\"{}\"".format(log_if_error, error_from_reply)
                return self._abort_with_fatal_error(message)

        return reply_content

    def _execute_cell_for_user(self, code, silent, store_history=True, user_expressions=None, allow_stdin=False):
        return super(SparkKernelBase, self).do_execute(code, silent, store_history, user_expressions, allow_stdin)

    def _do_shutdown_ipykernel(self, restart):
        return super(SparkKernelBase, self).do_shutdown(restart)

    def _complete_cell(self):
        """A method that runs a cell with no effect. Call this and return the value it
        returns when there's some sort of error preventing the user's cell from executing; this
        will register the cell from the Jupyter UI as being completed."""
        return self._execute_cell("None", False, True, None, False)

    def _show_user_error(self, message):
        self.logger.error(message)
        self.ipython_display.send_error(message)

    def _queue_fatal_error(self, message):
        """Queues up a fatal error to be thrown when the next cell is executed; does not
        raise an error immediately. We use this for errors that happen on kernel startup,
        since IPython crashes if we throw an exception in the __init__ method."""
        self._fatal_error = message

    def _abort_with_fatal_error(self, message):
        """Queues up a fatal error and throws it immediately."""
        self._queue_fatal_error(message)
        return self._repeat_fatal_error()

    def _repeat_fatal_error(self):
        """Throws an error that has already been queued."""
        error = conf.fatal_error_suggestion().format(self._fatal_error)
        self.logger.error(error)
        self.ipython_display.send_error(error)
        return self._complete_cell()
Exemplo n.º 36
0
class SparkKernelBase(IPythonKernel):
    def __init__(self,
                 implementation,
                 implementation_version,
                 language,
                 language_version,
                 language_info,
                 session_language,
                 user_code_parser=None,
                 **kwargs):
        # Required by Jupyter - Override
        self.implementation = implementation
        self.implementation_version = implementation_version
        self.language = language
        self.language_version = language_version
        self.language_info = language_info

        # Override
        self.session_language = session_language

        super(SparkKernelBase, self).__init__(**kwargs)

        self.logger = Log("_jupyter_kernel".format(self.session_language))
        self._fatal_error = None
        self.ipython_display = IpythonDisplay()

        if user_code_parser is None:
            self.user_code_parser = UserCodeParser()
        else:
            self.user_code_parser = user_code_parser

        # Disable warnings for test env in HDI
        requests.packages.urllib3.disable_warnings()

        if not kwargs.get("testing", False):
            self._load_magics_extension()
            self._change_language()
            if conf.use_auto_viz():
                self._register_auto_viz()

    def do_execute(self,
                   code,
                   silent,
                   store_history=True,
                   user_expressions=None,
                   allow_stdin=False):
        def f(self):
            if self._fatal_error is not None:
                return self._repeat_fatal_error()

            return self._do_execute(code, silent, store_history,
                                    user_expressions, allow_stdin)

        return wrap_unexpected_exceptions(f, self._complete_cell)(self)

    def do_shutdown(self, restart):
        # Cleanup
        self._delete_session()

        return self._do_shutdown_ipykernel(restart)

    def _do_execute(self, code, silent, store_history, user_expressions,
                    allow_stdin):
        code_to_run = self.user_code_parser.get_code_to_run(code)

        res = self._execute_cell(code_to_run, silent, store_history,
                                 user_expressions, allow_stdin)

        return res

    def _load_magics_extension(self):
        register_magics_code = "%load_ext remotespark.kernels"
        self._execute_cell(
            register_magics_code,
            True,
            False,
            shutdown_if_error=True,
            log_if_error="Failed to load the Spark kernels magics library.")
        self.logger.debug("Loaded magics.")

    def _change_language(self):
        register_magics_code = "%%_do_not_call_change_language -l {}\n ".format(
            self.session_language)
        self._execute_cell(
            register_magics_code,
            True,
            False,
            shutdown_if_error=True,
            log_if_error="Failed to change language to {}.".format(
                self.session_language))
        self.logger.debug("Changed language.")

    def _register_auto_viz(self):
        register_auto_viz_code = """from remotespark.datawidgets.utils import display_dataframe
ip = get_ipython()
ip.display_formatter.ipython_display_formatter.for_type_by_name('pandas.core.frame', 'DataFrame', display_dataframe)"""
        self._execute_cell(
            register_auto_viz_code,
            True,
            False,
            shutdown_if_error=True,
            log_if_error="Failed to register auto viz for notebook.")
        self.logger.debug("Registered auto viz.")

    def _delete_session(self):
        code = "%%_do_not_call_delete_session\n "
        self._execute_cell_for_user(code, True, False)

    def _execute_cell(self,
                      code,
                      silent,
                      store_history=True,
                      user_expressions=None,
                      allow_stdin=False,
                      shutdown_if_error=False,
                      log_if_error=None):
        reply_content = self._execute_cell_for_user(code, silent,
                                                    store_history,
                                                    user_expressions,
                                                    allow_stdin)

        if shutdown_if_error and reply_content[u"status"] == u"error":
            error_from_reply = reply_content[u"evalue"]
            if log_if_error is not None:
                message = "{}\nException details:\n\t\"{}\"".format(
                    log_if_error, error_from_reply)
                return self._abort_with_fatal_error(message)

        return reply_content

    def _execute_cell_for_user(self,
                               code,
                               silent,
                               store_history=True,
                               user_expressions=None,
                               allow_stdin=False):
        return super(SparkKernelBase,
                     self).do_execute(code, silent, store_history,
                                      user_expressions, allow_stdin)

    def _do_shutdown_ipykernel(self, restart):
        return super(SparkKernelBase, self).do_shutdown(restart)

    def _complete_cell(self):
        """A method that runs a cell with no effect. Call this and return the value it
        returns when there's some sort of error preventing the user's cell from executing; this
        will register the cell from the Jupyter UI as being completed."""
        return self._execute_cell("None", False, True, None, False)

    def _show_user_error(self, message):
        self.logger.error(message)
        self.ipython_display.send_error(message)

    def _queue_fatal_error(self, message):
        """Queues up a fatal error to be thrown when the next cell is executed; does not
        raise an error immediately. We use this for errors that happen on kernel startup,
        since IPython crashes if we throw an exception in the __init__ method."""
        self._fatal_error = message

    def _abort_with_fatal_error(self, message):
        """Queues up a fatal error and throws it immediately."""
        self._queue_fatal_error(message)
        return self._repeat_fatal_error()

    def _repeat_fatal_error(self):
        """Throws an error that has already been queued."""
        error = conf.fatal_error_suggestion().format(self._fatal_error)
        self.logger.error(error)
        self.ipython_display.send_error(error)
        return self._complete_cell()
Exemplo n.º 37
0
    def __init__(self):
        self.logger = Log("SessionManager")

        self._sessions = dict()
Exemplo n.º 38
0
class ReliableHttpClient(object):
    """Http client that is reliable in its requests. Uses requests library."""
    def __init__(self, endpoint, headers, retry_policy):
        self._endpoint = endpoint
        self._headers = headers
        self._retry_policy = retry_policy
        self.logger = Log("ReliableHttpClient")

        self.verify_ssl = not conf.ignore_ssl_errors()
        if not self.verify_ssl:
            self.logger.debug(
                "ATTENTION: Will ignore SSL errors. This might render you vulnerable to attacks."
            )
            requests.packages.urllib3.disable_warnings()

    def compose_url(self, relative_url):
        r_u = "/{}".format(relative_url.rstrip("/").lstrip("/"))
        return self._endpoint.url + r_u

    def get(self, relative_url, accepted_status_codes):
        """Sends a get request. Returns a response."""
        return self._send_request(relative_url, accepted_status_codes,
                                  requests.get)

    def post(self, relative_url, accepted_status_codes, data):
        """Sends a post request. Returns a response."""
        return self._send_request(relative_url, accepted_status_codes,
                                  requests.post, data)

    def delete(self, relative_url, accepted_status_codes):
        """Sends a delete request. Returns a response."""
        return self._send_request(relative_url, accepted_status_codes,
                                  requests.delete)

    def _send_request(self,
                      relative_url,
                      accepted_status_codes,
                      function,
                      data=None):
        return self._send_request_helper(self.compose_url(relative_url),
                                         accepted_status_codes, function, data,
                                         0)

    def _send_request_helper(self, url, accepted_status_codes, function, data,
                             retry_count):
        while True:
            try:
                if not self._endpoint.authenticate:
                    if data is None:
                        r = function(url,
                                     headers=self._headers,
                                     verify=self.verify_ssl)
                    else:
                        r = function(url,
                                     headers=self._headers,
                                     data=json.dumps(data),
                                     verify=self.verify_ssl)
                else:
                    if data is None:
                        r = function(url,
                                     headers=self._headers,
                                     auth=(self._endpoint.username,
                                           self._endpoint.password),
                                     verify=self.verify_ssl)
                    else:
                        r = function(url,
                                     headers=self._headers,
                                     auth=(self._endpoint.username,
                                           self._endpoint.password),
                                     data=json.dumps(data),
                                     verify=self.verify_ssl)
            except requests.exceptions.RequestException as e:
                error = True
                r = None
                status = None

                self.logger.error("Request to '{}' failed with '{}'".format(
                    url, e))
            else:
                error = False
                status = r.status_code

            if error or status not in accepted_status_codes:
                if self._retry_policy.should_retry(status, error, retry_count):
                    sleep(self._retry_policy.seconds_to_sleep(retry_count))
                    retry_count += 1
                    continue
                else:
                    raise HttpClientException(
                        "Invalid status code '{}' or error '{}' from {}".
                        format(status, error, url))
            return r
Exemplo n.º 39
0
class ClientManager(object):
    """Livy client manager"""

    def __init__(self, serializer=None):
        serialize_periodically = False
        serialize_period = 3

        if serializer is not None:
            serialize_periodically = conf.serialize_periodically()
            serialize_period = conf.serialize_period_seconds()

        self.logger = Log("ClientManager")

        self._livy_clients = dict()
        self._serializer = serializer
        self._serialize_timer = None

        if self._serializer is not None:
            for (name, client) in self._serializer.deserialize_state():
                self.add_client(name, client)

            if serialize_periodically:
                self._serialize_state_periodically(serialize_period)

    def _serialize_state_periodically(self, serialize_period):
        self.logger.debug("Starting state serialize timer.")

        self._serialize_timer = Timer(serialize_period, self._serialize_state)
        self._serialize_timer.start()

    def _serialize_state(self):
        self._serializer.serialize_state(self._livy_clients)

    @property
    def livy_clients(self):
        return self._livy_clients

    def get_sessions_list(self):
        return list(self._livy_clients.keys())

    def get_sessions_info(self):
        return ["Name: {}\t{}".format(k, str(self._livy_clients[k])) for k in list(self._livy_clients.keys())]

    def add_client(self, name, livy_client):
        if name in self.get_sessions_list():
            raise ValueError("Session with name '{}' already exists. Please delete the session"
                             " first if you intend to replace it.".format(name))

        self._livy_clients[name] = livy_client

    def get_any_client(self):
        number_of_sessions = len(self._livy_clients)
        if number_of_sessions == 1:
            key = self.get_sessions_list()[0]
            return self._livy_clients[key]
        elif number_of_sessions == 0:
            raise AssertionError("You need to have at least 1 client created to execute commands.")
        else:
            raise AssertionError("Please specify the client to use. Possible sessions are {}".format(
                self.get_sessions_list()))
        
    def get_client(self, name):
        if name in self.get_sessions_list():
            return self._livy_clients[name]
        raise ValueError("Could not find '{}' session in list of saved sessions. Possible sessions are {}".format(
            name, self.get_sessions_list()))

    def get_session_id_for_client(self, name):
        if name in self.get_sessions_list():
            return self._livy_clients[name].session_id
        return None

    def delete_client(self, name):
        self._remove_session(name)
    
    def clean_up_all(self):
        for name in self.get_sessions_list():
            self._remove_session(name)

        if self._serializer is not None:
            self._serialize_state()

    def _remove_session(self, name):
        if name in self.get_sessions_list():
            self._livy_clients[name].close_session()
            del self._livy_clients[name]
        else:
            raise ValueError("Could not find '{}' session in list of saved sessions. Possible sessions are {}"
                             .format(name, self.get_sessions_list()))
Exemplo n.º 40
0
class ClientManagerStateSerializer(object):
    """Livy client manager state serializer"""
    def __init__(self, reader_writer):
        assert reader_writer is not None

        self.logger = Log("ClientManagerStateSerializer")
        self._ipython_display = IpythonDisplay()

        self._reader_writer = reader_writer

    def deserialize_state(self):
        self.logger.debug("Deserializing state.")

        clients_to_return = []

        lines = self._reader_writer.read_lines()
        line = ''.join(lines).strip()

        if line != '':
            self.logger.debug("Read content. Converting to JSON.")
            json_str = json.loads(line)
            clients = json_str["clients"]

            for client in clients:
                # Ignore version for now
                name = client["name"]
                session_id = client["id"]
                sql_context_created = client["sqlcontext"]
                kind = client["kind"].lower()
                connection_string = client["connectionstring"]

                session = self._create_livy_session(connection_string,
                                                    {"kind": kind},
                                                    self._ipython_display,
                                                    session_id,
                                                    sql_context_created)

                # Do not start session automatically. Just create it but skip is not existent.
                try:
                    # Get status to know if it's alive or not.
                    status = session.status
                    if not session.is_final_status(status):
                        self.logger.debug(
                            "Adding session {}".format(session_id))
                        client_obj = self._create_livy_client(session)
                        clients_to_return.append((name, client_obj))
                    else:
                        self.logger.error(
                            "Skipping serialized session '{}' because session was in status {}."
                            .format(session.id, status))
                except (ValueError, ConnectionError) as e:
                    self.logger.error(
                        "Skipping serialized session '{}' because {}".format(
                            session.id, str(e)))
        else:
            self.logger.debug("Empty manager state found.")

        return clients_to_return

    def serialize_state(self, name_client_dictionary):
        self.logger.debug("Serializing state.")

        serialized_clients = []
        for name in list(name_client_dictionary.keys()):
            client = name_client_dictionary[name]
            serialized_client = client.serialize()
            serialized_client["name"] = name
            serialized_clients.append(serialized_client)

        serialized_str = json.dumps({"clients": serialized_clients})
        self._reader_writer.overwrite_with_line(serialized_str)

    def _create_livy_session(self, connection_string, properties,
                             ipython_display, session_id, sql_context_created):
        return LivySession.from_connection_string(connection_string,
                                                  properties, ipython_display,
                                                  session_id,
                                                  sql_context_created)

    def _create_livy_client(self, session):
        return LivyClient(session)
Exemplo n.º 41
0
class SparkController(object):
    def __init__(self, ipython_display):
        self.logger = Log("SparkController")
        self.ipython_display = ipython_display
        self.session_manager = SessionManager()

    def get_logs(self, client_name=None):
        session_to_use = self.get_session_by_name_or_default(client_name)
        return session_to_use.get_logs()

    def run_command(self, command, client_name=None):
        session_to_use = self.get_session_by_name_or_default(client_name)
        return command.execute(session_to_use)

    def run_sqlquery(self, sqlquery, client_name=None):
        session_to_use = self.get_session_by_name_or_default(client_name)
        return sqlquery.execute(session_to_use)

    def get_all_sessions_endpoint(self, endpoint):
        http_client = self._http_client(endpoint)
        sessions = http_client.get_sessions()["sessions"]
        session_list = [
            self._livy_session(http_client, {"kind": s["kind"]},
                               self.ipython_display, s["id"]) for s in sessions
        ]
        for s in session_list:
            s.refresh_status()
        return session_list

    def get_all_sessions_endpoint_info(self, endpoint):
        sessions = self.get_all_sessions_endpoint(endpoint)
        return [str(s) for s in sessions]

    def cleanup(self):
        self.session_manager.clean_up_all()

    def cleanup_endpoint(self, endpoint):
        for session in self.get_all_sessions_endpoint(endpoint):
            session.delete()

    def delete_session_by_name(self, name):
        self.session_manager.delete_client(name)

    def delete_session_by_id(self, endpoint, session_id):
        http_client = self._http_client(endpoint)
        response = http_client.get_session(session_id)
        http_client = self._http_client(endpoint)
        session = self._livy_session(http_client, {"kind": response["kind"]},
                                     self.ipython_display, session_id, False)
        session.delete()

    def add_session(self, name, endpoint, skip_if_exists, properties):
        if skip_if_exists and (name
                               in self.session_manager.get_sessions_list()):
            self.logger.debug(
                "Skipping {} because it already exists in list of sessions.".
                format(name))
            return
        http_client = self._http_client(endpoint)
        session = self._livy_session(http_client, properties,
                                     self.ipython_display)
        self.session_manager.add_session(name, session)
        session.start()

    def get_session_id_for_client(self, name):
        return self.session_manager.get_session_id_for_client(name)

    def get_client_keys(self):
        return self.session_manager.get_sessions_list()

    def get_manager_sessions_str(self):
        return self.session_manager.get_sessions_info()

    def get_session_by_name_or_default(self, client_name):
        if client_name is None:
            return self.session_manager.get_any_session()
        else:
            client_name = client_name.lower()
            return self.session_manager.get_session(client_name)

    def get_managed_clients(self):
        return self.session_manager.sessions

    @staticmethod
    def _livy_session(http_client,
                      properties,
                      ipython_display,
                      session_id=-1,
                      sql_created=None):
        return LivySession(http_client, properties, ipython_display,
                           session_id, sql_created)

    @staticmethod
    def _http_client(endpoint):
        return LivyReliableHttpClient.from_endpoint(endpoint)
Exemplo n.º 42
0
 def __init__(self, ipython_display):
     self.logger = Log("SparkController")
     self.ipython_display = ipython_display
     self.session_manager = SessionManager()
Exemplo n.º 43
0
 def __init__(self, session):
     self.logger = Log("LivyClient")
     self._session = session
     self._execute_timeout_seconds = conf.execute_timeout_seconds()
Exemplo n.º 44
0
class SparkController(object):

    def __init__(self, ipython_display, serialize_path=None):
        self.logger = Log("SparkController")
        self.ipython_display = ipython_display
        self.client_factory = LivyClientFactory()

        if serialize_path is not None:
            serializer = ClientManagerStateSerializer(self.client_factory, FileSystemReaderWriter(serialize_path))
            self.client_manager = ClientManager(serializer)
        else:
            self.client_manager = ClientManager()

    def get_logs(self, client_name=None):
        client_to_use = self.get_client_by_name_or_default(client_name)
        return client_to_use.get_logs()

    def run_cell(self, cell, client_name=None):
        client_to_use = self.get_client_by_name_or_default(client_name)
        return client_to_use.execute(cell)

    def run_cell_sql(self, cell, client_name=None):
        client_to_use = self.get_client_by_name_or_default(client_name)
        return client_to_use.execute_sql(cell)

    def run_cell_hive(self, cell, client_name=None):
        client_to_use = self.get_client_by_name_or_default(client_name)
        return client_to_use.execute_hive(cell)

    def get_all_sessions_endpoint(self, connection_string):
        http_client = self.client_factory.create_http_client(connection_string)
        r = http_client.get("/sessions", [200])
        sessions = r.json()["sessions"]
        session_list = [self.client_factory.create_session(self.ipython_display, connection_string, {"kind": s["kind"]}, s["id"])
                        for s in sessions]
        for s in session_list:
            s._refresh_status()
        return session_list

    def get_all_sessions_endpoint_info(self, connection_string):
        sessions = self.get_all_sessions_endpoint(connection_string)
        return [str(s) for s in sessions]

    def cleanup(self):
        self.client_manager.clean_up_all()

    def cleanup_endpoint(self, connection_string):
        for session in self.get_all_sessions_endpoint(connection_string):
            session.delete()

    def delete_session_by_name(self, name):
        self.client_manager.delete_client(name)

    def delete_session_by_id(self, connection_string, session_id):
        http_client = self.client_factory.create_http_client(connection_string)
        r = http_client.get("/sessions/{}".format(session_id), [200, 404])
        if r.status_code != 404:
            session = self.client_factory.create_session(self.ipython_display, connection_string, {"kind": r.json()["kind"]}, session_id, False)
            session.delete()

    def add_session(self, name, connection_string, skip_if_exists, properties):
        if skip_if_exists and (name in self.client_manager.get_sessions_list()):
            self.logger.debug("Skipping {} because it already exists in list of sessions.".format(name))
            return

        session = self.client_factory.create_session(self.ipython_display, connection_string, properties, "-1", False)
        session.start()

        livy_client = self.client_factory.build_client(session)
        self.client_manager.add_client(name, livy_client)
        livy_client.start()

    def get_client_keys(self):
        return self.client_manager.get_sessions_list()

    def get_manager_sessions_str(self):
        return self.client_manager.get_sessions_info()

    def get_client_by_name_or_default(self, client_name):
        if client_name is None:
            return self.client_manager.get_any_client()
        else:
            client_name = client_name.lower()
            return self.client_manager.get_client(client_name)
Exemplo n.º 45
0
class Command(ObjectWithGuid):
    def __init__(self, code, spark_events=None):
        super(Command, self).__init__()
        self.code = textwrap.dedent(code)
        self.logger = Log("Command")
        if spark_events is None:
            spark_events = SparkEvents()
        self._spark_events = spark_events

    def __eq__(self, other):
        return self.code == other.code

    def __ne__(self, other):
        return not self == other

    def execute(self, session):
        self._spark_events.emit_statement_execution_start_event(
            session.guid, session.kind, session.id, self.guid)
        statement_id = -1
        try:
            session.wait_for_idle()
            data = {"code": self.code}
            response = session.http_client.post_statement(session.id, data)
            statement_id = response['id']
            output = self._get_statement_output(session, statement_id)
        except Exception as e:
            self._spark_events.emit_statement_execution_end_event(
                session.guid, session.kind, session.id, self.guid,
                statement_id, False, e.__class__.__name__, str(e))
            raise
        else:
            self._spark_events.emit_statement_execution_end_event(
                session.guid, session.kind, session.id, self.guid,
                statement_id, True, "", "")
            return output

    def _get_statement_output(self, session, statement_id):
        statement_running = True
        out = ""
        while statement_running:
            statement = session.http_client.get_statement(
                session.id, statement_id)
            status = statement["state"]

            self.logger.debug("Status of statement {} is {}.".format(
                statement_id, status))

            if status == "running":
                session.sleep()
            else:
                statement_running = False

                statement_output = statement["output"]
                if statement_output["status"] == "ok":
                    out = (True, statement_output["data"]["text/plain"])
                elif statement_output["status"] == "error":
                    out = (False, statement_output["evalue"] + "\n" +
                           "".join(statement_output["traceback"]))
                else:
                    raise LivyUnexpectedStatusException(
                        "Unknown output status from Livy: '{}'".format(
                            statement_output["status"]))

        return out
Exemplo n.º 46
0
def test_log_init():
    logger = Log('something')
    assert isinstance(logger.logger, logging.Logger)
Exemplo n.º 47
0
 def __init__(self, ipython_display):
     self.logger = Log(u"SparkController")
     self.ipython_display = ipython_display
     self.session_manager = SessionManager()
Exemplo n.º 48
0
class LivySession(object):
    """Session that is livy specific."""

    def __init__(self, ipython_display, http_client, session_id, sql_created, properties):
        assert "kind" in properties.keys()
        kind = properties["kind"]
        self.properties = properties
        self.ipython_display = ipython_display

        status_sleep_seconds = conf.status_sleep_seconds()
        statement_sleep_seconds = conf.statement_sleep_seconds()
        create_sql_context_timeout_seconds = conf.create_sql_context_timeout_seconds()

        assert status_sleep_seconds > 0
        assert statement_sleep_seconds > 0
        assert create_sql_context_timeout_seconds > 0
        if session_id == "-1" and sql_created is True:
            raise ValueError("Cannot indicate sql state without session id.")

        self.logger = Log("LivySession")

        kind = kind.lower()
        if kind not in Constants.session_kinds_supported:
            raise ValueError("Session of kind '{}' not supported. Session must be of kinds {}."
                             .format(kind, ", ".join(Constants.session_kinds_supported)))

        if session_id == "-1":
            self._status = Constants.not_started_session_status
            sql_created = False
        else:
            self._status = Constants.busy_session_status

        self._logs = ""
        self._http_client = http_client
        self._status_sleep_seconds = status_sleep_seconds
        self._statement_sleep_seconds = statement_sleep_seconds
        self._create_sql_context_timeout_seconds = create_sql_context_timeout_seconds

        self._state = LivySessionState(session_id, http_client.connection_string,
                                       kind, sql_created)

    def __str__(self):
        return "Session id: {}\tKind: {}\tState: {}".format(self.id, self.kind, self._status)

    def get_state(self):
        return self._state

    def start(self):
        """Start the session against actual livy server."""
        self.logger.debug("Starting '{}' session.".format(self.kind))

        r = self._http_client.post("/sessions", [201], self.properties)
        self._state.session_id = str(r.json()["id"])
        self._status = str(r.json()["state"])

        self.ipython_display.writeln("Creating SparkContext as 'sc'")
        self.logger.debug("Session '{}' started.".format(self.kind))

    def create_sql_context(self):
        """Create a sqlContext object on the session. Object will be accessible via variable 'sqlContext'."""
        if self.started_sql_context:
            return

        self.logger.debug("Starting '{}' sql and hive session.".format(self.kind))

        self.ipython_display.writeln("Creating SqlContext as 'sqlContext'")
        self._create_context(Constants.context_name_sql)

        self.ipython_display.writeln("Creating HiveContext as 'hiveContext'")
        self._create_context(Constants.context_name_hive)

        self._state.sql_context_created = True

    def _create_context(self, context_type):
        if context_type == Constants.context_name_sql:
            command = self._get_sql_context_creation_command()
        elif context_type == Constants.context_name_hive:
            command = self._get_hive_context_creation_command()
        else:
            raise ValueError("Cannot create context of type {}.".format(context_type))

        try:
            self.wait_for_idle(self._create_sql_context_timeout_seconds)
            self.execute(command)
            self.logger.debug("Started '{}' {} session.".format(self.kind, context_type))
        except LivyClientTimeoutError:
            raise LivyClientTimeoutError("Failed to create the {} context in time. Timed out after {} seconds."
                                         .format(context_type, self._create_sql_context_timeout_seconds))

    @property
    def id(self):
        return self._state.session_id

    @property
    def started_sql_context(self):
        return self._state.sql_context_created

    @property
    def kind(self):
        return self._state.kind

    @property
    def logs(self):
        self._refresh_logs()
        return self._logs

    @property
    def http_client(self):
        return self._http_client

    @staticmethod
    def is_final_status(status):
        return status in Constants.final_status
    
    def execute(self, commands):
        code = textwrap.dedent(commands)

        data = {"code": code}
        r = self._http_client.post(self._statements_url(), [201], data)
        statement_id = r.json()['id']
        
        return self._get_statement_output(statement_id)

    def delete(self):
        self.logger.debug("Deleting session '{}'".format(self.id))

        if self._status != Constants.not_started_session_status and self._status != Constants.dead_session_status:
            self._http_client.delete("/sessions/{}".format(self.id), [200, 404])
            self._status = Constants.dead_session_status
            self._state.session_id = "-1"
        else:
            raise ValueError("Cannot delete session {} that is in state '{}'."
                             .format(self.id, self._status))

    def wait_for_idle(self, seconds_to_wait):
        """Wait for session to go to idle status. Sleep meanwhile. Calls done every status_sleep_seconds as
        indicated by the constructor.

        Parameters:
            seconds_to_wait : number of seconds to wait before giving up.
        """
        self._refresh_status()
        current_status = self._status
        if current_status == Constants.idle_session_status:
            return

        if current_status in Constants.final_status:
            error = "Session {} unexpectedly reached final status {}. See logs:\n{}"\
                .format(self.id, current_status, self.logs)
            self.logger.error(error)
            raise LivyUnexpectedStatusError(error)

        if seconds_to_wait <= 0.0:
            error = "Session {} did not reach idle status in time. Current status is {}."\
                .format(self.id, current_status)
            self.logger.error(error)
            raise LivyClientTimeoutError(error)

        start_time = time()
        self.logger.debug("Session {} in state {}. Sleeping {} seconds."
                          .format(self.id, current_status, seconds_to_wait))
        sleep(self._status_sleep_seconds)
        elapsed = (time() - start_time)
        return self.wait_for_idle(seconds_to_wait - elapsed)

    def _statements_url(self):
        return "/sessions/{}/statements".format(self.id)

    def _refresh_status(self):
        status = self._get_latest_status()

        if status in Constants.possible_session_status:
            self._status = status
        else:
            raise ValueError("Status '{}' not supported by session.".format(status))

        return self._status

    def _refresh_logs(self):
        self._logs = self._get_latest_logs()

    def _get_latest_status(self):
        r = self._http_client.get("/sessions/{}".format(self.id), [200])
        session = r.json()
                    
        return session['state']

    def _get_latest_logs(self):
        r = self._http_client.get("/sessions/{}/log?from=0".format(self.id), [200])
        log_array = r.json()['log']
        logs = "\n".join(log_array)

        return logs
    
    def _get_statement_output(self, statement_id):
        statement_running = True
        out = ""
        while statement_running:
            r = self._http_client.get(self._statements_url(), [200])
            statement = [i for i in r.json()["statements"] if i["id"] == statement_id][0]
            status = statement["state"]

            self.logger.debug("Status of statement {} is {}.".format(statement_id, status))

            if status == "running":
                sleep(self._statement_sleep_seconds)
            else:
                statement_running = False
                
                statement_output = statement["output"]
                if statement_output["status"] == "ok":
                    out = (True, statement_output["data"]["text/plain"])
                elif statement_output["status"] == "error":
                    out = (False, statement_output["evalue"] + "\n" +
                           "".join(statement_output["traceback"]))
                else:
                    raise ValueError("Unknown output status: '{}'".format(statement_output["status"]))

        return out

    def _get_sql_context_creation_command(self):
        if self.kind == Constants.session_kind_spark:
            sql_context_command = "val sqlContext = new org.apache.spark.sql.SQLContext(sc)\n" \
                                  "import sqlContext.implicits._"
        elif self.kind == Constants.session_kind_pyspark:
            sql_context_command = "from pyspark.sql import SQLContext\nfrom pyspark.sql.types import *\n" \
                                  "sqlContext = SQLContext(sc)"
        elif self.kind == Constants.session_kind_sparkr:
            sql_context_command = "sqlContext <- sparkRSQL.init(sc)"
        else:
            raise ValueError("Do not know how to create sqlContext in session of kind {}.".format(self.kind))

        return sql_context_command

    def _get_hive_context_creation_command(self):
        if self.kind == Constants.session_kind_spark:
            hive_context_command = "val hiveContext = new org.apache.spark.sql.hive.HiveContext(sc)"
        elif self.kind == Constants.session_kind_pyspark:
            hive_context_command = "from pyspark.sql import HiveContext\nhiveContext = HiveContext(sc)"
        elif self.kind == Constants.session_kind_sparkr:
            hive_context_command = "hiveContext <- sparkRHive.init(sc)"
        else:
            raise ValueError("Do not know how to create hiveContext in session of kind {}.".format(self.kind))

        return hive_context_command
Exemplo n.º 49
0
class SparkController(object):
    def __init__(self, ipython_display):
        self.logger = Log(u"SparkController")
        self.ipython_display = ipython_display
        self.session_manager = SessionManager()

    def get_app_id(self, client_name=None):
        session_to_use = self.get_session_by_name_or_default(client_name)
        return session_to_use.get_app_id()

    def get_driver_log_url(self, client_name=None):
        session_to_use = self.get_session_by_name_or_default(client_name)
        return session_to_use.get_driver_log_url()

    def get_logs(self, client_name=None):
        session_to_use = self.get_session_by_name_or_default(client_name)
        return session_to_use.get_logs()

    def get_spark_ui_url(self, client_name=None):
        session_to_use = self.get_session_by_name_or_default(client_name)
        return session_to_use.get_spark_ui_url()

    def run_command(self, command, client_name=None):
        session_to_use = self.get_session_by_name_or_default(client_name)
        return command.execute(session_to_use)

    def run_sqlquery(self, sqlquery, client_name=None):
        session_to_use = self.get_session_by_name_or_default(client_name)
        return sqlquery.execute(session_to_use)

    def get_all_sessions_endpoint(self, endpoint):
        http_client = self._http_client(endpoint)
        sessions = http_client.get_sessions()[u"sessions"]
        session_list = [self._livy_session(http_client, {u"kind": s[u"kind"]},
                                           self.ipython_display, s[u"id"])
                        for s in sessions]
        for s in session_list:
            s.refresh_status()
        return session_list

    def get_all_sessions_endpoint_info(self, endpoint):
        sessions = self.get_all_sessions_endpoint(endpoint)
        return [str(s) for s in sessions]

    def cleanup(self):
        self.session_manager.clean_up_all()

    def cleanup_endpoint(self, endpoint):
        for session in self.get_all_sessions_endpoint(endpoint):
            session.delete()

    def delete_session_by_name(self, name):
        self.session_manager.delete_client(name)

    def delete_session_by_id(self, endpoint, session_id):
        http_client = self._http_client(endpoint)
        response = http_client.get_session(session_id)
        http_client = self._http_client(endpoint)
        session = self._livy_session(http_client, {u"kind": response[u"kind"]},
                                     self.ipython_display, session_id, False)
        session.delete()

    def add_session(self, name, endpoint, skip_if_exists, properties):
        if skip_if_exists and (name in self.session_manager.get_sessions_list()):
            self.logger.debug(u"Skipping {} because it already exists in list of sessions.".format(name))
            return
        http_client = self._http_client(endpoint)
        session = self._livy_session(http_client, properties, self.ipython_display)
        self.session_manager.add_session(name, session)
        session.start()

    def get_session_id_for_client(self, name):
        return self.session_manager.get_session_id_for_client(name)

    def get_client_keys(self):
        return self.session_manager.get_sessions_list()

    def get_manager_sessions_str(self):
        return self.session_manager.get_sessions_info()

    def get_session_by_name_or_default(self, client_name):
        if client_name is None:
            return self.session_manager.get_any_session()
        else:
            client_name = client_name.lower()
            return self.session_manager.get_session(client_name)

    def get_managed_clients(self):
        return self.session_manager.sessions

    @staticmethod
    def _livy_session(http_client, properties, ipython_display,
                      session_id=-1, sql_created=None):
        return LivySession(http_client, properties, ipython_display,
                           session_id, sql_created)

    @staticmethod
    def _http_client(endpoint):
        return LivyReliableHttpClient.from_endpoint(endpoint)
Exemplo n.º 50
0
 def __init__(self):
     self.logger = Log("EventsHandler")
Exemplo n.º 51
0
class SparkKernelBase(IPythonKernel):
    def __init__(self, implementation, implementation_version, language, language_version, language_info,
                 kernel_conf_name, session_language, client_name, **kwargs):
        # Required by Jupyter - Override
        self.implementation = implementation
        self.implementation_version = implementation_version
        self.language = language
        self.language_version = language_version
        self.language_info = language_info

        # Override
        self.kernel_conf_name = kernel_conf_name
        self.session_language = session_language
        self.client_name = client_name

        super(SparkKernelBase, self).__init__(**kwargs)

        self._logger = Log(self.client_name)
        self._session_started = False
        self._fatal_error = None
        self._ipython_display = IpythonDisplay()

        self.user_command_parser = UserCommandParser()

        # Disable warnings for test env in HDI
        requests.packages.urllib3.disable_warnings()

        if not kwargs.get("testing", False):
            configuration = self._get_configuration()
            if not configuration:
                # _get_configuration() sets the error for us so we can just return now.
                # The kernel is not in a good state and all do_execute calls will
                # fail with the fatal error.
                return
            (username, password, url) = configuration
            self.connection_string = get_connection_string(url, username, password)
            self._load_magics_extension()
            if conf.use_auto_viz():
                self._register_auto_viz()

    def do_execute(self, code, silent, store_history=True, user_expressions=None, allow_stdin=False):
        if self._fatal_error is not None:
            self._repeat_fatal_error()

        # Parse command
        subcommand, force, output_var, command = self.user_command_parser.parse_user_command(code)

        # Get transformer
        transformer = self._get_code_transformer(subcommand)

        # Get instructions
        try:
            code_to_run, error_to_show, begin_action, end_action, deletes_session = \
                transformer.get_code_to_execute(self._session_started, self.connection_string,
                                                force, output_var, command)
        except SyntaxError as se:
            self._show_user_error("{}".format(se))
        else:
            # Execute instructions
            if error_to_show is not None:
                self._show_user_error(error_to_show)
                return self._execute_cell(code_to_run, silent, store_history, user_expressions, allow_stdin)

            if begin_action == Constants.delete_session_action:
                self._delete_session()
            elif begin_action == Constants.start_session_action:
                self._start_session()
            elif begin_action == Constants.do_nothing_action:
                pass
            else:
                raise ValueError("Begin action {} not supported.".format(begin_action))

            res = self._execute_cell(code_to_run, silent, store_history, user_expressions, allow_stdin)

            if end_action == Constants.delete_session_action:
                self._delete_session()
            elif end_action == Constants.start_session_action:
                self._start_session()
            elif end_action == Constants.do_nothing_action:
                pass
            else:
                raise ValueError("End action {} not supported.".format(end_action))

            if deletes_session:
                self._session_started = False

            return res

        return self._execute_cell("", silent, store_history, user_expressions, allow_stdin)

    def do_shutdown(self, restart):
        # Cleanup
        self._delete_session()

        return self._do_shutdown_ipykernel(restart)

    @staticmethod
    def _get_code_transformer(subcommand):
        if subcommand == UserCommandParser.run_command:
            return SparkTransformer(subcommand)
        elif subcommand == UserCommandParser.sql_command:
            return SqlTransformer(subcommand)
        elif subcommand == UserCommandParser.hive_command:
            return HiveTransformer(subcommand)
        elif subcommand == UserCommandParser.config_command:
            return ConfigTransformer(subcommand)
        elif subcommand == UserCommandParser.info_command:
            return InfoTransformer(subcommand)
        elif subcommand == UserCommandParser.delete_command:
            return DeleteSessionTransformer(subcommand)
        elif subcommand == UserCommandParser.clean_up_command:
            return CleanUpTransformer(subcommand)
        elif subcommand == UserCommandParser.logs_command:
            return LogsTransformer(subcommand)
        elif subcommand == UserCommandParser.local_command:
            return PythonTransformer(subcommand)
        else:
            return NotSupportedTransformer(subcommand)

    def _load_magics_extension(self):
        register_magics_code = "%load_ext remotespark"
        self._execute_cell(register_magics_code, True, False, shutdown_if_error=True,
                           log_if_error="Failed to load the Spark magics library.")
        self._logger.debug("Loaded magics.")

    def _register_auto_viz(self):
        register_auto_viz_code = """from remotespark.datawidgets.utils import display_dataframe
ip = get_ipython()
ip.display_formatter.ipython_display_formatter.for_type_by_name('pandas.core.frame', 'DataFrame', display_dataframe)"""
        self._execute_cell(register_auto_viz_code, True, False, shutdown_if_error=True,
                           log_if_error="Failed to register auto viz for notebook.")
        self._logger.debug("Registered auto viz.")

    def _start_session(self):
        if not self._session_started:
            self._session_started = True

            add_session_code = "%spark add {} {} {} skip".format(
                self.client_name, self.session_language, self.connection_string)
            self._execute_cell(add_session_code, True, False, shutdown_if_error=True,
                               log_if_error="Failed to create a Livy session.")
            self._logger.debug("Added session.")

    def _delete_session(self):
        if self._session_started:
            code = "%spark cleanup"
            self._execute_cell_for_user(code, True, False)
            self._session_started = False

    def _get_configuration(self):
        """Returns (username, password, url). If there is an error (missing configuration),
           returns False."""
        try:
            credentials = getattr(conf, 'kernel_' + self.kernel_conf_name + '_credentials')()
            ret = (credentials['username'], credentials['password'], credentials['url'])

            # The URL has to be set in the configuration.
            assert(ret[2])

            return ret
        except (KeyError, AssertionError):
            message = "Please set configuration for 'kernel_{}_credentials' to initialize Kernel".format(
                self.kernel_conf_name)
            self._queue_fatal_error(message)
            return False

    def _execute_cell(self, code, silent, store_history=True, user_expressions=None, allow_stdin=False,
                      shutdown_if_error=False, log_if_error=None):
        reply_content = self._execute_cell_for_user(code, silent, store_history, user_expressions, allow_stdin)

        if shutdown_if_error and reply_content[u"status"] == u"error":
            error_from_reply = reply_content[u"evalue"]
            if log_if_error is not None:
                message = "{}\nException details:\n\t\"{}\"".format(log_if_error, error_from_reply)
                self._abort_with_fatal_error(message)

        return reply_content

    def _execute_cell_for_user(self, code, silent, store_history=True, user_expressions=None, allow_stdin=False):
        return super(SparkKernelBase, self).do_execute(code, silent, store_history, user_expressions, allow_stdin)

    def _do_shutdown_ipykernel(self, restart):
        return super(SparkKernelBase, self).do_shutdown(restart)

    def _show_user_error(self, message):
        self._logger.error(message)
        self._ipython_display.send_error(message)

    def _queue_fatal_error(self, message):
        """Queues up a fatal error to be thrown when the next cell is executed; does not
        raise an error immediately. We use this for errors that happen on kernel startup,
        since IPython crashes if we throw an exception in the __init__ method."""
        self._fatal_error = message

    def _abort_with_fatal_error(self, message):
        """Queues up a fatal error and throws it immediately."""
        self._queue_fatal_error(message)
        self._repeat_fatal_error()

    def _repeat_fatal_error(self):
        """Throws an error that has already been queued."""
        error = conf.fatal_error_suggestion().format(self._fatal_error)
        self._logger.error(error)
        self._ipython_display.send_error(error)
        raise ValueError(self._fatal_error)
Exemplo n.º 52
0
class LivySession(ObjectWithGuid):
    def __init__(self, http_client, properties, ipython_display,
                 session_id=-1, sql_created=None, spark_events=None):
        super(LivySession, self).__init__()
        assert "kind" in list(properties.keys())
        kind = properties["kind"]
        self.properties = properties
        self.ipython_display = ipython_display

        if spark_events is None:
            spark_events = SparkEvents()
        self._spark_events = spark_events

        status_sleep_seconds = conf.status_sleep_seconds()
        statement_sleep_seconds = conf.statement_sleep_seconds()
        wait_for_idle_timeout_seconds = conf.wait_for_idle_timeout_seconds()

        assert status_sleep_seconds > 0
        assert statement_sleep_seconds > 0
        assert wait_for_idle_timeout_seconds > 0
        if session_id == -1 and sql_created is True:
            raise BadUserDataException("Cannot indicate sql state without session id.")

        self.logger = Log("LivySession")

        kind = kind.lower()
        if kind not in constants.SESSION_KINDS_SUPPORTED:
            raise BadUserDataException("Session of kind '{}' not supported. Session must be of kinds {}."
                                       .format(kind, ", ".join(constants.SESSION_KINDS_SUPPORTED)))

        if session_id == -1:
            self.status = constants.NOT_STARTED_SESSION_STATUS
            sql_created = False
        else:
            self.status = constants.BUSY_SESSION_STATUS

        self._logs = ""
        self._http_client = http_client
        self._status_sleep_seconds = status_sleep_seconds
        self._statement_sleep_seconds = statement_sleep_seconds
        self._wait_for_idle_timeout_seconds = wait_for_idle_timeout_seconds

        self.kind = kind
        self.id = session_id
        self.created_sql_context = sql_created

    def __str__(self):
        return "Session id: {}\tKind: {}\tState: {}".format(self.id, self.kind, self.status)

    def start(self, create_sql_context=True):
        """Start the session against actual livy server."""
        self._spark_events.emit_session_creation_start_event(self.guid, self.kind)

        try:
            r = self._http_client.post_session(self.properties)
            self.id = r["id"]
            self.status = str(r["state"])

            self.ipython_display.writeln("Creating SparkContext as 'sc'")
            # We wait for livy_session_startup_timeout_seconds() for the session to start up.
            try:
                self.wait_for_idle(conf.livy_session_startup_timeout_seconds())
            except LivyClientTimeoutException:
                raise LivyClientTimeoutException("Session {} did not start up in {} seconds."
                                                 .format(self.id, conf.livy_session_startup_timeout_seconds()))

            if create_sql_context:
                self.create_sql_context()
        except Exception as e:
            self._spark_events.emit_session_creation_end_event(self.guid, self.kind, self.id, self.status,
                                                               False, e.__class__.__name__, str(e))
            raise
        else:
            self._spark_events.emit_session_creation_end_event(self.guid, self.kind, self.id, self.status, True, "", "")

    def create_sql_context(self):
        """Create a sqlContext object on the session. Object will be accessible via variable 'sqlContext'."""
        if self.created_sql_context:
            return
        self.logger.debug("Starting '{}' hive session.".format(self.kind))
        self.ipython_display.writeln("Creating HiveContext as 'sqlContext'")
        command = self._get_sql_context_creation_command()
        try:
            (success, out) = command.execute(self)
        except LivyClientTimeoutException:
            raise LivyClientTimeoutException("Failed to create the SqlContext in time. Timed out after {} seconds."
                                             .format(self._wait_for_idle_timeout_seconds))
        if success:
            self.created_sql_context = True
        else:
            raise FailedToCreateSqlContextException("Failed to create the SqlContext.\nError: '{}'".format(out))

    def get_logs(self):
        log_array = self._http_client.get_all_session_logs(self.id)['log']
        self._logs = "\n".join(log_array)
        return self._logs

    @property
    def http_client(self):
        return self._http_client

    @staticmethod
    def is_final_status(status):
        return status in constants.FINAL_STATUS

    def delete(self):
        session_id = self.id
        self._spark_events.emit_session_deletion_start_event(self.guid, self.kind, session_id, self.status)

        try:
            self.logger.debug("Deleting session '{}'".format(session_id))

            if self.status != constants.NOT_STARTED_SESSION_STATUS and self.status != constants.DEAD_SESSION_STATUS:
                self._http_client.delete_session(session_id)
                self.status = constants.DEAD_SESSION_STATUS
                self.id = -1
            else:
                self.ipython_display.send_error("Cannot delete session {} that is in state '{}'."
                                                .format(session_id, self.status))
        except Exception as e:
            self._spark_events.emit_session_deletion_end_event(self.guid, self.kind, session_id, self.status, False,
                                                               e.__class__.__name__, str(e))
            raise
        else:
            self._spark_events.emit_session_deletion_end_event(self.guid, self.kind, session_id, self.status, True, "", "")

    def wait_for_idle(self, seconds_to_wait=None):
        """Wait for session to go to idle status. Sleep meanwhile. Calls done every status_sleep_seconds as
        indicated by the constructor.

        Parameters:
            seconds_to_wait : number of seconds to wait before giving up.
        """
        if seconds_to_wait is None:
            seconds_to_wait = self._wait_for_idle_timeout_seconds

        while True:
            self.refresh_status()
            if self.status == constants.IDLE_SESSION_STATUS:
                return

            if self.status in constants.FINAL_STATUS:
                error = "Session {} unexpectedly reached final status '{}'."\
                    .format(self.id, self.status)
                self.logger.error(error)
                raise LivyUnexpectedStatusException('{} See logs:\n{}'.format(error, self.get_logs()))

            if seconds_to_wait <= 0.0:
                error = "Session {} did not reach idle status in time. Current status is {}."\
                    .format(self.id, self.status)
                self.logger.error(error)
                raise LivyClientTimeoutException(error)

            start_time = time()
            self.logger.debug("Session {} in state {}. Sleeping {} seconds."
                              .format(self.id, self.status, self._status_sleep_seconds))
            sleep(self._status_sleep_seconds)
            seconds_to_wait -= time() - start_time

    def sleep(self):
        sleep(self._statement_sleep_seconds)

    def refresh_status(self):
        status = self._http_client.get_session(self.id)['state']

        if status in constants.POSSIBLE_SESSION_STATUS:
            self.status = status
        else:
            raise LivyUnexpectedStatusException("Status '{}' not supported by session.".format(status))

        return self.status

    def _get_sql_context_creation_command(self):
        if self.kind == constants.SESSION_KIND_SPARK:
            sql_context_command = "val sqlContext = new org.apache.spark.sql.hive.HiveContext(sc)"
        elif self.kind == constants.SESSION_KIND_PYSPARK:
            sql_context_command = "from pyspark.sql import HiveContext\nsqlContext = HiveContext(sc)"
        elif self.kind == constants.SESSION_KIND_SPARKR:
            sql_context_command = "sqlContext <- sparkRHive.init(sc)"
        else:
            raise BadUserDataException("Do not know how to create HiveContext in session of kind {}.".format(self.kind))

        return Command(sql_context_command)
Exemplo n.º 53
0
class LivySession(object):
    """Session that is livy specific."""

    def __init__(self, http_client, properties, ipython_display,
                 session_id="-1", sql_created=None):
        assert "kind" in list(properties.keys())
        kind = properties["kind"]
        self.properties = properties
        self.ipython_display = ipython_display

        status_sleep_seconds = conf.status_sleep_seconds()
        statement_sleep_seconds = conf.statement_sleep_seconds()
        create_sql_context_timeout_seconds = conf.create_sql_context_timeout_seconds()

        assert status_sleep_seconds > 0
        assert statement_sleep_seconds > 0
        assert create_sql_context_timeout_seconds > 0
        if session_id == "-1" and sql_created is True:
            raise ValueError("Cannot indicate sql state without session id.")

        self.logger = Log("LivySession")

        kind = kind.lower()
        if kind not in constants.SESSION_KINDS_SUPPORTED:
            raise ValueError("Session of kind '{}' not supported. Session must be of kinds {}."
                             .format(kind, ", ".join(constants.SESSION_KINDS_SUPPORTED)))

        if session_id == "-1":
            self.status = constants.NOT_STARTED_SESSION_STATUS
            sql_created = False
        else:
            self.status = constants.BUSY_SESSION_STATUS

        self._logs = ""
        self._http_client = http_client
        self._status_sleep_seconds = status_sleep_seconds
        self._statement_sleep_seconds = statement_sleep_seconds
        self._create_sql_context_timeout_seconds = create_sql_context_timeout_seconds

        self._state = LivySessionState(session_id, self._http_client.connection_string,
                                       kind, sql_created)

    @staticmethod
    def from_connection_string(connection_string, properties, ipython_display,
                               session_id="-1", sql_created=None):
        http_client = LivyReliableHttpClient.from_connection_string(connection_string)
        return LivySession(http_client, properties, ipython_display, session_id, sql_created)

    def __str__(self):
        return "Session id: {}\tKind: {}\tState: {}".format(self.id, self.kind, self.status)

    def get_state(self):
        return self._state

    def start(self):
        """Start the session against actual livy server."""
        self.logger.debug("Starting '{}' session.".format(self.kind))

        r = self._http_client.post("/sessions", [201], self.properties)
        self._state.session_id = str(r.json()["id"])
        self.status = str(r.json()["state"])

        self.ipython_display.writeln("Creating SparkContext as 'sc'")
        self.logger.debug("Session '{}' started.".format(self.kind))

    def create_sql_context(self):
        """Create a sqlContext object on the session. Object will be accessible via variable 'sqlContext'."""
        if self.started_sql_context:
            return
        self.logger.debug("Starting '{}' hive session.".format(self.kind))
        self.ipython_display.writeln("Creating HiveContext as 'sqlContext'")
        self._create_context(constants.CONTEXT_NAME_SQL)
        self._state.sql_context_created = True

    def _create_context(self, context_type):
        if context_type == constants.CONTEXT_NAME_SQL:
            command = self._get_sql_context_creation_command()
        else:
            raise ValueError("Cannot create context of type {}.".format(context_type))

        try:
            self.wait_for_idle(self._create_sql_context_timeout_seconds)
            self.execute(command)
            self.logger.debug("Started '{}' {} session.".format(self.kind, context_type))
        except LivyClientTimeoutError:
            raise LivyClientTimeoutError("Failed to create the {} context in time. Timed out after {} seconds."
                                         .format(context_type, self._create_sql_context_timeout_seconds))

    def get_logs(self):
        r = self._http_client.get("/sessions/{}/log?from=0".format(self.id), [200])
        log_array = r.json()['log']
        self._logs = "\n".join(log_array)
        return self._logs

    @property
    def id(self):
        return self._state.session_id

    @property
    def started_sql_context(self):
        return self._state.sql_context_created

    @property
    def kind(self):
        return self._state.kind

    @property
    def http_client(self):
        return self._http_client

    @staticmethod
    def is_final_status(status):
        return status in constants.FINAL_STATUS
    
    def execute(self, commands):
        code = textwrap.dedent(commands)

        data = {"code": code}
        r = self._http_client.post(self._statements_url(), [201], data)
        statement_id = r.json()['id']
        
        return self._get_statement_output(statement_id)

    def delete(self):
        self.logger.debug("Deleting session '{}'".format(self.id))

        if self.status != constants.NOT_STARTED_SESSION_STATUS and self.status != constants.DEAD_SESSION_STATUS:
            self._http_client.delete("/sessions/{}".format(self.id), [200, 404])
            self.status = constants.DEAD_SESSION_STATUS
            self._state.session_id = "-1"
        else:
            raise ValueError("Cannot delete session {} that is in state '{}'."
                             .format(self.id, self.status))

    def wait_for_idle(self, seconds_to_wait):
        """Wait for session to go to idle status. Sleep meanwhile. Calls done every status_sleep_seconds as
        indicated by the constructor.

        Parameters:
            seconds_to_wait : number of seconds to wait before giving up.
        """
        self._refresh_status()
        current_status = self.status
        if current_status == constants.IDLE_SESSION_STATUS:
            return

        if current_status in constants.FINAL_STATUS:
            error = "Session {} unexpectedly reached final status '{}'. See logs:\n{}"\
                .format(self.id, current_status, self.get_logs())
            self.logger.error(error)
            raise LivyUnexpectedStatusError(error)

        if seconds_to_wait <= 0.0:
            error = "Session {} did not reach idle status in time. Current status is {}."\
                .format(self.id, current_status)
            self.logger.error(error)
            raise LivyClientTimeoutError(error)

        start_time = time()
        self.logger.debug("Session {} in state {}. Sleeping {} seconds."
                          .format(self.id, current_status, seconds_to_wait))
        sleep(self._status_sleep_seconds)
        elapsed = (time() - start_time)
        return self.wait_for_idle(seconds_to_wait - elapsed)

    def _statements_url(self):
        return "/sessions/{}/statements".format(self.id)

    def _refresh_status(self):
        status = self._http_client.get("/sessions/{}".format(self.id), [200]).json()['state']

        if status in constants.POSSIBLE_SESSION_STATUS:
            self.status = status
        else:
            raise ValueError("Status '{}' not supported by session.".format(status))

        return self.status
    
    def _get_statement_output(self, statement_id):
        statement_running = True
        out = ""
        while statement_running:
            r = self._http_client.get(self._statements_url(), [200])
            statement = [i for i in r.json()["statements"] if i["id"] == statement_id][0]
            status = statement["state"]

            self.logger.debug("Status of statement {} is {}.".format(statement_id, status))

            if status == "running":
                sleep(self._statement_sleep_seconds)
            else:
                statement_running = False
                
                statement_output = statement["output"]
                if statement_output["status"] == "ok":
                    out = (True, statement_output["data"]["text/plain"])
                elif statement_output["status"] == "error":
                    out = (False, statement_output["evalue"] + "\n" +
                           "".join(statement_output["traceback"]))
                else:
                    raise ValueError("Unknown output status: '{}'".format(statement_output["status"]))

        return out

    def _get_sql_context_creation_command(self):
        if self.kind == constants.SESSION_KIND_SPARK:
            sql_context_command = "val sqlContext = new org.apache.spark.sql.hive.HiveContext(sc)"
        elif self.kind == constants.SESSION_KIND_PYSPARK:
            sql_context_command = "from pyspark.sql import HiveContext\nsqlContext = HiveContext(sc)"
        elif self.kind == constants.SESSION_KIND_SPARKR:
            sql_context_command = "sqlContext <- sparkRHive.init(sc)"
        else:
            raise ValueError("Do not know how to create HiveContext in session of kind {}.".format(self.kind))

        return sql_context_command
Exemplo n.º 54
0
class LivySession(ObjectWithGuid):
    """Session that is livy specific."""

    def __init__(self, http_client, properties, ipython_display,
                 session_id=-1, sql_created=None):
        super(LivySession, self).__init__()
        assert "kind" in list(properties.keys())
        kind = properties["kind"]
        self.properties = properties
        self.ipython_display = ipython_display
        self._spark_events = SparkEvents()

        status_sleep_seconds = conf.status_sleep_seconds()
        statement_sleep_seconds = conf.statement_sleep_seconds()
        wait_for_idle_timeout_seconds = conf.wait_for_idle_timeout_seconds()

        assert status_sleep_seconds > 0
        assert statement_sleep_seconds > 0
        assert wait_for_idle_timeout_seconds > 0
        if session_id == -1 and sql_created is True:
            raise ValueError("Cannot indicate sql state without session id.")

        self.logger = Log("LivySession")

        kind = kind.lower()
        if kind not in constants.SESSION_KINDS_SUPPORTED:
            raise ValueError("Session of kind '{}' not supported. Session must be of kinds {}."
                             .format(kind, ", ".join(constants.SESSION_KINDS_SUPPORTED)))

        if session_id == -1:
            self.status = constants.NOT_STARTED_SESSION_STATUS
            sql_created = False
        else:
            self.status = constants.BUSY_SESSION_STATUS

        self._logs = ""
        self._http_client = http_client
        self._status_sleep_seconds = status_sleep_seconds
        self._statement_sleep_seconds = statement_sleep_seconds
        self._wait_for_idle_timeout_seconds = wait_for_idle_timeout_seconds

        self.kind = kind
        self.id = session_id
        self.created_sql_context = sql_created

    def __str__(self):
        return "Session id: {}\tKind: {}\tState: {}".format(self.id, self.kind, self.status)

    def start(self, create_sql_context=True):
        """Start the session against actual livy server."""
        self._spark_events.emit_session_creation_start_event(self.guid, self.kind)

        r = self._http_client.post_session(self.properties)
        self.id = r["id"]
        self.status = str(r["state"])

        self.ipython_display.writeln("Creating SparkContext as 'sc'")
        # We wait for livy_session_startup_timeout_seconds() for the session to start up.
        try:
            self.wait_for_idle(conf.livy_session_startup_timeout_seconds())
        except LivyClientTimeoutError:
            raise LivyClientTimeoutError("Session {} did not start up in {} seconds."\
                                         .format(self.id, conf.livy_session_startup_timeout_seconds()))

        if create_sql_context:
            self.create_sql_context()
        self._spark_events.emit_session_creation_end_event(self.guid, self.kind, self.id, self.status)

    def create_sql_context(self):
        """Create a sqlContext object on the session. Object will be accessible via variable 'sqlContext'."""
        if self.created_sql_context:
            return
        self.logger.debug("Starting '{}' hive session.".format(self.kind))
        self.ipython_display.writeln("Creating HiveContext as 'sqlContext'")
        command = self._get_sql_context_creation_command()
        try:
            command.execute(self)
        except LivyClientTimeoutError:
            raise LivyClientTimeoutError("Failed to create the SqlContext in time. Timed out after {} seconds."
                                         .format(self._wait_for_idle_timeout_seconds))
        self.created_sql_context = True

    def get_logs(self):
        log_array = self._http_client.get_all_session_logs(self.id)['log']
        self._logs = "\n".join(log_array)
        return self._logs

    @property
    def http_client(self):
        return self._http_client

    @staticmethod
    def is_final_status(status):
        return status in constants.FINAL_STATUS

    def delete(self):
        self.logger.debug("Deleting session '{}'".format(self.id))

        if self.status != constants.NOT_STARTED_SESSION_STATUS and self.status != constants.DEAD_SESSION_STATUS:
            self._http_client.delete_session(self.id)
            self.status = constants.DEAD_SESSION_STATUS
            self.id = -1
        else:
            raise ValueError("Cannot delete session {} that is in state '{}'."
                             .format(self.id, self.status))

    def wait_for_idle(self, seconds_to_wait=None):
        """Wait for session to go to idle status. Sleep meanwhile. Calls done every status_sleep_seconds as
        indicated by the constructor.

        Parameters:
            seconds_to_wait : number of seconds to wait before giving up.
        """
        if seconds_to_wait is None:
            seconds_to_wait = self._wait_for_idle_timeout_seconds

        while True:
            self._refresh_status()
            if self.status == constants.IDLE_SESSION_STATUS:
                return

            if self.status in constants.FINAL_STATUS:
                error = "Session {} unexpectedly reached final status '{}'. See logs:\n{}"\
                    .format(self.id, self.status, self.get_logs())
                self.logger.error(error)
                raise LivyUnexpectedStatusError(error)

            if seconds_to_wait <= 0.0:
                error = "Session {} did not reach idle status in time. Current status is {}."\
                    .format(self.id, self.status)
                self.logger.error(error)
                raise LivyClientTimeoutError(error)

            start_time = time()
            self.logger.debug("Session {} in state {}. Sleeping {} seconds."
                              .format(self.id, self.status, self._status_sleep_seconds))
            sleep(self._status_sleep_seconds)
            seconds_to_wait -= time() - start_time

    def sleep(self):
        sleep(self._statement_sleep_seconds)

    def _refresh_status(self):
        status = self._http_client.get_session(self.id)['state']

        if status in constants.POSSIBLE_SESSION_STATUS:
            self.status = status
        else:
            raise ValueError("Status '{}' not supported by session.".format(status))

        return self.status

    def _get_sql_context_creation_command(self):
        if self.kind == constants.SESSION_KIND_SPARK:
            sql_context_command = "val sqlContext = new org.apache.spark.sql.hive.HiveContext(sc)"
        elif self.kind == constants.SESSION_KIND_PYSPARK:
            sql_context_command = "from pyspark.sql import HiveContext\nsqlContext = HiveContext(sc)"
        elif self.kind == constants.SESSION_KIND_SPARKR:
            sql_context_command = "sqlContext <- sparkRHive.init(sc)"
        else:
            raise ValueError("Do not know how to create HiveContext in session of kind {}.".format(self.kind))

        return Command(sql_context_command)
Exemplo n.º 55
0
 def __init__(self):
     self.logger = Log("EventsHandler")