Exemplo n.º 1
0
    def __init__(self, endpoint, headers, retry_policy):
        self._endpoint = endpoint
        self._headers = headers
        self._retry_policy = retry_policy

        if self._endpoint.auth == constants.AUTH_KERBEROS:
            if self._endpoint.krb_mutual_auth == constants.AUTH_KERBEROS_MUTUAL_REQ:
                mutual_auth = REQUIRED
            elif self._endpoint.krb_mutual_auth == constants.AUTH_KERBEROS_MUTUAL_OPT:
                mutual_auth = OPTIONAL
            elif self._endpoint.krb_mutual_auth == constants.AUTH_KERBEROS_MUTUAL_DIS:
                mutual_auth = DISABLED
            else:
                mutual_auth = REQUIRED
            if self._endpoint.krb_host_override == "":
                hostname_override = None
            else:
                hostname_override = self._endpoint.krb_host_override
            self._auth = HTTPKerberosAuth(mutual_authentication=mutual_auth,
                                          hostname_override=hostname_override)
        elif self._endpoint.auth == constants.AUTH_BASIC:
            self._auth = (self._endpoint.username, self._endpoint.password)
        elif self._endpoint.auth != constants.NO_AUTH:
            raise BadUserConfigurationException(u"Unsupported auth %s" %
                                                self._endpoint.auth)

        self.logger = SparkLog(u"ReliableHttpClient")

        self.verify_ssl = not conf.ignore_ssl_errors()
        if not self.verify_ssl:
            self.logger.debug(
                u"ATTENTION: Will ignore SSL errors. This might render you vulnerable to attacks."
            )
            requests.packages.urllib3.disable_warnings()
Exemplo n.º 2
0
    def __init__(self, implementation, implementation_version, language, language_version, language_info,
                 session_language, user_code_parser=None, **kwargs):
        # Required by Jupyter - Override
        self.implementation = implementation
        self.implementation_version = implementation_version
        self.language = language
        self.language_version = language_version
        self.language_info = language_info

        # Override
        self.session_language = session_language

        super(SparkKernelBase, self).__init__(**kwargs)

        self.logger = SparkLog(u"{}_jupyter_kernel".format(self.session_language))
        self._fatal_error = None
        self.ipython_display = IpythonDisplay()

        if user_code_parser is None:
            self.user_code_parser = UserCodeParser()
        else:
            self.user_code_parser = user_code_parser

        # Disable warnings for test env in HDI
        requests.packages.urllib3.disable_warnings()

        if not kwargs.get("testing", False):
            self._load_magics_extension()
            self._change_language()
            if conf.use_auto_viz():
                self._register_auto_viz()
    def __init__(self, endpoint, headers, retry_policy):
        self._endpoint = endpoint
        self._headers = headers
        self._retry_policy = retry_policy
        if self._endpoint.auth == constants.AUTH_KERBEROS:
            self._auth = HTTPKerberosAuth(**conf.kerberos_auth_configuration())
        elif self._endpoint.auth == constants.AUTH_BASIC:
            self._auth = (self._endpoint.username, self._endpoint.password)
        elif self._endpoint.auth == constants.GOOGLE_AUTH:
            credentials = (conf.google_auth_credentials())
            self._auth = credentials
            #Once we have credentials, we attach them to a transport. We use the transport to
            #make authenticated requests: how does self._session work? Do I not have to use
            #AuthorizedSession? Kerberos HTTPKerberosAuth attaches Kerberos Auth to Requests object
            #so should self._auth = credentials or authed_session?
            authed_session = AuthorizedSession(credentials)
        elif self._endpoint.auth != constants.NO_AUTH:
            raise BadUserConfigurationException(u"Unsupported auth %s" %
                                                self._endpoint.auth)
        self._session = requests.Session()

        self.logger = SparkLog(u"ReliableHttpClient")

        self.verify_ssl = not conf.ignore_ssl_errors()
        if not self.verify_ssl:
            self.logger.debug(
                u"ATTENTION: Will ignore SSL errors. This might render you vulnerable to attacks."
            )
            requests.packages.urllib3.disable_warnings()
    def __init__(self, ipython_display):
        self.logger = SparkLog(u"SessionManager")
        self.ipython_display = ipython_display

        self._sessions = dict()

        self._register_cleanup_on_exit()
Exemplo n.º 5
0
 def __init__(self, ipython_display):
     self.logger = SparkLog(u"SparkController")
     self.ipython_display = ipython_display
     self.session_manager = SessionManager(ipython_display)
     # this is to reuse the already created http clients
     # since the reliablehttpclient uses requests session
     self._http_clients = {}
Exemplo n.º 6
0
    def __init__(self,
                 http_client,
                 properties,
                 ipython_display,
                 session_id=-1,
                 spark_events=None,
                 heartbeat_timeout=0,
                 heartbeat_thread=None):
        super(LivySession, self).__init__()
        assert constants.LIVY_KIND_PARAM in list(properties.keys())
        kind = properties[constants.LIVY_KIND_PARAM]

        should_heartbeat = False
        if heartbeat_timeout > 0:
            should_heartbeat = True
            properties[
                constants.LIVY_HEARTBEAT_TIMEOUT_PARAM] = heartbeat_timeout
        elif constants.LIVY_HEARTBEAT_TIMEOUT_PARAM in list(properties.keys()):
            properties.pop(constants.LIVY_HEARTBEAT_TIMEOUT_PARAM)

        self.properties = properties
        self.ipython_display = ipython_display
        self._should_heartbeat = should_heartbeat
        self._user_passed_heartbeat_thread = heartbeat_thread

        if spark_events is None:
            spark_events = SparkEvents()
        self._spark_events = spark_events

        self._policy = ConfigurableRetryPolicy(
            retry_seconds_to_sleep_list=[0.2, 0.5, 0.5, 1, 1, 2],
            max_retries=5000)
        wait_for_idle_timeout_seconds = conf.wait_for_idle_timeout_seconds()

        assert wait_for_idle_timeout_seconds > 0

        self.logger = SparkLog(u"LivySession")

        kind = kind.lower()
        if kind not in constants.SESSION_KINDS_SUPPORTED:
            raise BadUserDataException(
                u"Session of kind '{}' not supported. Session must be of kinds {}."
                .format(kind, ", ".join(constants.SESSION_KINDS_SUPPORTED)))

        self._app_id = None
        self._logs = u""
        self._http_client = http_client
        self._wait_for_idle_timeout_seconds = wait_for_idle_timeout_seconds
        self._printed_resource_warning = False

        self.kind = kind
        self.id = session_id
        self.session_info = u""

        self._heartbeat_thread = None
        if session_id == -1:
            self.status = constants.NOT_STARTED_SESSION_STATUS
        else:
            self.status = constants.BUSY_SESSION_STATUS
            self._start_heartbeat_thread()
Exemplo n.º 7
0
 def __init__(self, code, spark_events=None):
     super(Command, self).__init__()
     self.code = textwrap.dedent(code)
     self.logger = SparkLog(u"Command")
     if spark_events is None:
         spark_events = SparkEvents()
     self._spark_events = spark_events
Exemplo n.º 8
0
class Command(ObjectWithGuid):
    def __init__(self, code, spark_events=None):
        super(Command, self).__init__()
        self.code = textwrap.dedent(code)
        self.logger = SparkLog(u"Command")
        if spark_events is None:
            spark_events = SparkEvents()
        self._spark_events = spark_events

    def __eq__(self, other):
        return self.code == other.code

    def __ne__(self, other):
        return not self == other

    def execute(self, session):
        self._spark_events.emit_statement_execution_start_event(session.guid, session.kind, session.id, self.guid)
        statement_id = -1
        try:
            session.wait_for_idle()
            data = {u"code": self.code}
            response = session.http_client.post_statement(session.id, data)
            statement_id = response[u'id']
            output = self._get_statement_output(session, statement_id)
        except Exception as e:
            self._spark_events.emit_statement_execution_end_event(session.guid, session.kind, session.id,
                                                                  self.guid, statement_id, False, e.__class__.__name__,
                                                                  str(e))
            raise
        else:
            self._spark_events.emit_statement_execution_end_event(session.guid, session.kind, session.id,
                                                                  self.guid, statement_id, True, "", "")
            return output

    def _get_statement_output(self, session, statement_id):
        retries = 1
        
        while True:
            statement = session.http_client.get_statement(session.id, statement_id)
            status = statement[u"state"].lower()

            self.logger.debug(u"Status of statement {} is {}.".format(statement_id, status))

            if status not in FINAL_STATEMENT_STATUS:
                session.sleep(retries)
                retries += 1
            else:                
                statement_output = statement[u"output"]

                if statement_output is None:
                    return (True, u"")

                if statement_output[u"status"] == u"ok":
                    return (True, statement_output[u"data"][u"text/plain"])
                elif statement_output[u"status"] == u"error":
                    return (False,
                           statement_output[u"evalue"] + u"\n" + u"".join(statement_output[u"traceback"]))
                else:
                    raise LivyUnexpectedStatusException(u"Unknown output status from Livy: '{}'"
                                                        .format(statement_output[u"status"]))
Exemplo n.º 9
0
class Command(ObjectWithGuid):
    def __init__(self, code, spark_events=None):
        super(Command, self).__init__()
        self.code = textwrap.dedent(code)
        self.logger = SparkLog(u"Command")
        if spark_events is None:
            spark_events = SparkEvents()
        self._spark_events = spark_events

    def __eq__(self, other):
        return self.code == other.code

    def __ne__(self, other):
        return not self == other

    def execute(self, session):
        self._spark_events.emit_statement_execution_start_event(session.guid, session.kind, session.id, self.guid)
        statement_id = -1
        try:
            session.wait_for_idle()
            data = {u"code": self.code}
            response = session.http_client.post_statement(session.id, data)
            statement_id = response[u'id']
            output = self._get_statement_output(session, statement_id)
        except Exception as e:
            self._spark_events.emit_statement_execution_end_event(session.guid, session.kind, session.id,
                                                                  self.guid, statement_id, False, e.__class__.__name__,
                                                                  str(e))
            raise
        else:
            self._spark_events.emit_statement_execution_end_event(session.guid, session.kind, session.id,
                                                                  self.guid, statement_id, True, "", "")
            return output

    def _get_statement_output(self, session, statement_id):
        statement_running = True
        out = u""
        while statement_running:
            statement = session.http_client.get_statement(session.id, statement_id)
            status = statement[u"state"]

            self.logger.debug(u"Status of statement {} is {}.".format(statement_id, status))

            if status == u"running":
                session.sleep()
            else:
                statement_running = False

                statement_output = statement[u"output"]
                if statement_output[u"status"] == u"ok":
                    out = (True, statement_output[u"data"][u"text/plain"])
                elif statement_output[u"status"] == u"error":
                    out = (False,
                           statement_output[u"evalue"] + u"\n" + u"".join(statement_output[u"traceback"]))
                else:
                    raise LivyUnexpectedStatusException(u"Unknown output status from Livy: '{}'"
                                                        .format(statement_output[u"status"]))

        return out
Exemplo n.º 10
0
    def __init__(self,
                 http_client,
                 properties,
                 ipython_display,
                 session_id=-1,
                 sql_created=None,
                 spark_events=None,
                 should_heartbeat=False,
                 heartbeat_thread=None):
        super(LivySession, self).__init__()
        assert u"kind" in list(properties.keys())
        kind = properties[u"kind"]
        self.properties = properties
        self.ipython_display = ipython_display
        self._should_heartbeat = should_heartbeat
        self._user_passed_heartbeat_thread = heartbeat_thread

        if spark_events is None:
            spark_events = SparkEvents()
        self._spark_events = spark_events

        status_sleep_seconds = conf.status_sleep_seconds()
        statement_sleep_seconds = conf.statement_sleep_seconds()
        wait_for_idle_timeout_seconds = conf.wait_for_idle_timeout_seconds()

        assert status_sleep_seconds > 0
        assert statement_sleep_seconds > 0
        assert wait_for_idle_timeout_seconds > 0
        if session_id == -1 and sql_created is True:
            raise BadUserDataException(
                u"Cannot indicate sql state without session id.")

        self.logger = SparkLog(u"LivySession")

        kind = kind.lower()
        if kind not in constants.SESSION_KINDS_SUPPORTED:
            raise BadUserDataException(
                u"Session of kind '{}' not supported. Session must be of kinds {}."
                .format(kind, ", ".join(constants.SESSION_KINDS_SUPPORTED)))

        self._app_id = None
        self._logs = u""
        self._http_client = http_client
        self._status_sleep_seconds = status_sleep_seconds
        self._statement_sleep_seconds = statement_sleep_seconds
        self._wait_for_idle_timeout_seconds = wait_for_idle_timeout_seconds

        self.kind = kind
        self.id = session_id
        self.created_sql_context = sql_created

        self._heartbeat_thread = None
        if session_id == -1:
            self.status = constants.NOT_STARTED_SESSION_STATUS
            sql_created = False
        else:
            self.status = constants.BUSY_SESSION_STATUS
            self._start_heartbeat_thread()
Exemplo n.º 11
0
    def post(self):
        self.logger = SparkLog(u"ReconnectHandler")

        spark_events = self._get_spark_events()

        try:
            data = json_decode(self.request.body)
        except ValueError as e:
            self.set_status(400)
            msg = "Invalid JSON in request body."
            self.logger.error(msg)
            self.finish(msg)
            spark_events.emit_cluster_change_event(None, 400, False, msg)
            return

        endpoint = None
        try:
            path = self._get_argument_or_raise(data, 'path')
            username = self._get_argument_or_raise(data, 'username')
            password = self._get_argument_or_raise(data, 'password')
            endpoint = self._get_argument_or_raise(data, 'endpoint')
            auth = self._get_argument_if_exists(data, 'auth')
            if auth is None:
                if username == '' and password == '':
                    auth = constants.NO_AUTH
                else:
                    auth = constants.AUTH_BASIC
        except MissingArgumentError as e:
            self.set_status(400)
            self.finish(str(e))
            self.logger.error(str(e))
            spark_events.emit_cluster_change_event(endpoint, 400, False, str(e))
            return

        kernel_name = self._get_kernel_name(data)

        # Get kernel manager, create a new kernel if none exists or restart the existing one when applicable
        kernel_manager = yield self._get_kernel_manager(path, kernel_name)

        # Execute code
        client = kernel_manager.client()
        code = '%{} -s {} -u {} -p {} -t {}'.format(KernelMagics._do_not_call_change_endpoint.__name__, endpoint, username, password, auth)
        response_id = client.execute(code, silent=False, store_history=False)
        msg = client.get_shell_msg(response_id)

        # Get execution info
        successful_message = self._msg_successful(msg)
        error = self._msg_error(msg)
        if successful_message:
            status_code = 200
        else:
            self.logger.error(u"Code to reconnect errored out: {}".format(error))
            status_code = 500

        # Post execution info
        self.set_status(status_code)
        self.finish(json.dumps(dict(success=successful_message, error=error), sort_keys=True))
        spark_events.emit_cluster_change_event(endpoint, status_code, successful_message, error)
Exemplo n.º 12
0
class SparkMagicBase(Magics):
    def __init__(self, shell, data=None, spark_events=None):
        # You must call the parent constructor
        super(SparkMagicBase, self).__init__(shell)

        self.logger = SparkLog(u"SparkMagics")
        self.ipython_display = IpythonDisplay()
        self.spark_controller = SparkController(self.ipython_display)

        self.logger.debug("Initialized spark magics.")

        if spark_events is None:
            spark_events = SparkEvents()
        spark_events.emit_library_loaded_event()

    def execute_spark(self, cell, output_var, samplemethod, maxrows,
                      samplefraction, session_name):
        (success,
         out) = self.spark_controller.run_command(Command(cell), session_name)
        if not success:
            self.ipython_display.send_error(out)
        else:
            self.ipython_display.write(out)
            if output_var is not None:
                spark_store_command = self._spark_store_command(
                    output_var, samplemethod, maxrows, samplefraction)
                df = self.spark_controller.run_command(spark_store_command,
                                                       session_name)
                self.shell.user_ns[output_var] = df

    @staticmethod
    def _spark_store_command(output_var, samplemethod, maxrows,
                             samplefraction):
        return SparkStoreCommand(output_var, samplemethod, maxrows,
                                 samplefraction)

    def execute_sqlquery(self, cell, samplemethod, maxrows, samplefraction,
                         session, output_var, quiet):
        sqlquery = self._sqlquery(cell, samplemethod, maxrows, samplefraction)
        df = self.spark_controller.run_sqlquery(sqlquery, session)
        if output_var is not None:
            self.shell.user_ns[output_var] = df
        if quiet:
            return None
        else:
            return df

    @staticmethod
    def _sqlquery(cell, samplemethod, maxrows, samplefraction):
        return SQLQuery(cell, samplemethod, maxrows, samplefraction)

    def _print_endpoint_info(self, info_sessions, current_session_id):
        if info_sessions:
            info_sessions = sorted(info_sessions, key=lambda s: s.id)
            html = get_sessions_info_html(info_sessions, current_session_id)
            self.ipython_display.html(html)
        else:
            self.ipython_display.html(u'No active sessions.')
Exemplo n.º 13
0
class SparkMagicBase(Magics):
    def __init__(self, shell, data=None, spark_events=None):
        # You must call the parent constructor
        super(SparkMagicBase, self).__init__(shell)

        self.logger = SparkLog(u"SparkMagics")
        self.ipython_display = IpythonDisplay()
        self.spark_controller = SparkController(self.ipython_display)

        self.logger.debug("Initialized spark magics.")

        if spark_events is None:
            spark_events = SparkEvents()
        spark_events.emit_library_loaded_event()

    def execute_sqlquery(self, cell, samplemethod, maxrows, samplefraction,
                         session, output_var, quiet):
        sqlquery = self._sqlquery(cell, samplemethod, maxrows, samplefraction)
        df = self.spark_controller.run_sqlquery(sqlquery, session)
        if output_var is not None:
            self.shell.user_ns[output_var] = df
        if quiet:
            return None
        else:
            return df

    @staticmethod
    def _sqlquery(cell, samplemethod, maxrows, samplefraction):
        return SQLQuery(cell, samplemethod, maxrows, samplefraction)

    def _print_endpoint_info(self, info_sessions, current_session_id):
        if info_sessions:
            info_sessions = sorted(info_sessions, key=lambda s: s.id)
            html = u"""<table>
<tr><th>ID</th><th>YARN Application ID</th><th>Kind</th><th>State</th><th>Spark UI</th><th>Driver log</th><th>Current session?</th></tr>""" + \
                u"".join([SparkMagicBase._session_row_html(session, current_session_id) for session in info_sessions]) + \
                u"</table>"
            self.ipython_display.html(html)
        else:
            self.ipython_display.html(u'No active sessions.')

    @staticmethod
    def _session_row_html(session, current_session_id):
        return u"""<tr><td>{0}</td><td>{1}</td><td>{2}</td><td>{3}</td><td>{4}</td><td>{5}</td><td>{6}</td></tr>""".format(
            session.id, session.get_app_id(), session.kind, session.status,
            SparkMagicBase._link(u'Link', session.get_spark_ui_url()),
            SparkMagicBase._link(u'Link', session.get_driver_log_url()),
            u"" if current_session_id is None
            or current_session_id != session.id else u"✔")

    @staticmethod
    def _link(text, url):
        if url is not None:
            return u"""<a target="_blank" href="{1}">{0}</a>""".format(
                text, url)
        else:
            return u""
Exemplo n.º 14
0
class SparkMagicBase(Magics):
    def __init__(self, shell, data=None, spark_events=None):
        # You must call the parent constructor
        super(SparkMagicBase, self).__init__(shell)

        self.logger = SparkLog(u"SparkMagics")
        self.ipython_display = IpythonDisplay()
        self.spark_controller = SparkController(self.ipython_display)

        self.logger.debug("Initialized spark magics.")

        if spark_events is None:
            spark_events = SparkEvents()
        spark_events.emit_library_loaded_event()

    def execute_sqlquery(self, cell, samplemethod, maxrows, samplefraction,
                         session, output_var, quiet):
        sqlquery = self._sqlquery(cell, samplemethod, maxrows, samplefraction)
        df = self.spark_controller.run_sqlquery(sqlquery, session)
        if output_var is not None:
            self.shell.user_ns[output_var] = df
        if quiet:
            return None
        else:
            return df

    @staticmethod
    def _sqlquery(cell, samplemethod, maxrows, samplefraction):
        return SQLQuery(cell, samplemethod, maxrows, samplefraction)

    def _print_endpoint_info(self, info_sessions, current_session_id):
        if info_sessions:
            info_sessions = sorted(info_sessions, key=lambda s: s.id)
            html = u"""<table>
<tr><th>ID</th><th>YARN Application ID</th><th>Kind</th><th>State</th><th>Spark UI</th><th>Driver log</th><th>Current session?</th></tr>""" + \
                u"".join([SparkMagicBase._session_row_html(session, current_session_id) for session in info_sessions]) + \
                u"</table>"
            self.ipython_display.html(html)
        else:
            self.ipython_display.html(u'No active sessions.')

    @staticmethod
    def _session_row_html(session, current_session_id):
        return u"""<tr><td>{0}</td><td>{1}</td><td>{2}</td><td>{3}</td><td>{4}</td><td>{5}</td><td>{6}</td></tr>""".format(
            session.id, session.get_app_id(), session.kind, session.status,
            SparkMagicBase._link(u'Link', session.get_spark_ui_url()), SparkMagicBase._link(u'Link', session.get_driver_log_url()),
            u"" if current_session_id is None or current_session_id != session.id else u"✔"
        )

    @staticmethod
    def _link(text, url):
        if url is not None:
            return u"""<a target="_blank" href="{1}">{0}</a>""".format(text, url)
        else:
            return u""
 def __init__(self, endpoint, headers, retry_policy):
     self._endpoint = endpoint
     self._headers = headers
     self._retry_policy = retry_policy
     self._auth = self._endpoint.auth
     self._session = requests.Session()
     self.logger = SparkLog(u"ReliableHttpClient")
     self.verify_ssl = not conf.ignore_ssl_errors()
     if not self.verify_ssl:
         self.logger.debug(u"ATTENTION: Will ignore SSL errors. This might render you vulnerable to attacks.")
         requests.packages.urllib3.disable_warnings()
Exemplo n.º 16
0
    def __init__(self, shell, data=None, spark_events=None):
        # You must call the parent constructor
        super(SparkMagicBase, self).__init__(shell)

        self.logger = SparkLog(u"SparkMagics")
        self.ipython_display = IpythonDisplay()
        self.spark_controller = SparkController(self.ipython_display)

        self.logger.debug("Initialized spark magics.")

        if spark_events is None:
            spark_events = SparkEvents()
        spark_events.emit_library_loaded_event()
Exemplo n.º 17
0
    def __init__(self, spark_controller, ipywidget_factory, ipython_display,
                 endpoints, refresh_method):
        # This is nested
        super(ManageEndpointWidget,
              self).__init__(spark_controller, ipywidget_factory,
                             ipython_display, True)

        self.logger = SparkLog("ManageEndpointWidget")
        self.endpoints = endpoints
        self.refresh_method = refresh_method

        self.children = self.get_existing_endpoint_widgets()

        for child in self.children:
            child.parent_widget = self
Exemplo n.º 18
0
 def __init__(self, code, spark_events=None):
     super(Command, self).__init__()
     self.code = textwrap.dedent(code)
     self.logger = SparkLog(u"Command")
     if spark_events is None:
         spark_events = SparkEvents()
     self._spark_events = spark_events
Exemplo n.º 19
0
    def __init__(self, implementation, implementation_version, language, language_version, language_info,
                 session_language, user_code_parser=None, **kwargs):
        # Required by Jupyter - Override
        self.implementation = implementation
        self.implementation_version = implementation_version
        self.language = language
        self.language_version = language_version
        self.language_info = language_info

        # Override
        self.session_language = session_language

        super(SparkKernelBase, self).__init__(**kwargs)

        self.logger = SparkLog(u"{}_jupyter_kernel".format(self.session_language))
        self._fatal_error = None
        self.ipython_display = IpythonDisplay()

        if user_code_parser is None:
            self.user_code_parser = UserCodeParser()
        else:
            self.user_code_parser = user_code_parser

        # Disable warnings for test env in HDI
        requests.packages.urllib3.disable_warnings()

        if not kwargs.get("testing", False):
            self._load_magics_extension()
            self._change_language()
            if conf.use_auto_viz():
                self._register_auto_viz()
    def __init__(self, endpoint, headers, retry_policy):
        self._endpoint = endpoint
        self._headers = headers
        self._retry_policy = retry_policy
        if self._endpoint.auth_type == constants.AUTH_KERBEROS:
            self._auth = HTTPKerberosAuth(mutual_authentication=OPTIONAL)
        elif self._endpoint.auth_type == constants.AUTH_BASIC:
            self._auth = (self._endpoint.username, self._endpoint.password)
        self.logger = SparkLog(u"ReliableHttpClient")

        self.verify_ssl = not conf.ignore_ssl_errors()
        if not self.verify_ssl:
            self.logger.debug(
                u"ATTENTION: Will ignore SSL errors. This might render you vulnerable to attacks."
            )
            requests.packages.urllib3.disable_warnings()
Exemplo n.º 21
0
    def __init__(self, http_client, properties, ipython_display,
                 session_id=-1, spark_events=None,
                 heartbeat_timeout=0, heartbeat_thread=None):
        super(LivySession, self).__init__()
        assert constants.LIVY_KIND_PARAM in list(properties.keys())
        kind = properties[constants.LIVY_KIND_PARAM]

        should_heartbeat = False
        if heartbeat_timeout > 0:
            should_heartbeat = True
            properties[constants.LIVY_HEARTBEAT_TIMEOUT_PARAM] = heartbeat_timeout
        elif constants.LIVY_HEARTBEAT_TIMEOUT_PARAM in list(properties.keys()):
            properties.pop(constants.LIVY_HEARTBEAT_TIMEOUT_PARAM)

        self.properties = properties
        self.ipython_display = ipython_display
        self._should_heartbeat = should_heartbeat
        self._user_passed_heartbeat_thread = heartbeat_thread

        if spark_events is None:
            spark_events = SparkEvents()
        self._spark_events = spark_events

        status_sleep_seconds = conf.status_sleep_seconds()
        statement_sleep_seconds = conf.statement_sleep_seconds()
        wait_for_idle_timeout_seconds = conf.wait_for_idle_timeout_seconds()

        assert status_sleep_seconds > 0
        assert statement_sleep_seconds > 0
        assert wait_for_idle_timeout_seconds > 0

        self.logger = SparkLog(u"LivySession")

        kind = kind.lower()
        if kind not in constants.SESSION_KINDS_SUPPORTED:
            raise BadUserDataException(u"Session of kind '{}' not supported. Session must be of kinds {}."
                                       .format(kind, ", ".join(constants.SESSION_KINDS_SUPPORTED)))

        self._app_id = None
        self._logs = u""
        self._http_client = http_client
        self._status_sleep_seconds = status_sleep_seconds
        self._statement_sleep_seconds = statement_sleep_seconds
        self._wait_for_idle_timeout_seconds = wait_for_idle_timeout_seconds
        self._printed_resource_warning = False

        self.kind = kind
        self.id = session_id
        self.session_info = u""
        
        self._heartbeat_thread = None
        if session_id == -1:
            self.status = constants.NOT_STARTED_SESSION_STATUS
        else:
            self.status = constants.BUSY_SESSION_STATUS
            self._start_heartbeat_thread()
Exemplo n.º 22
0
    def __init__(self, endpoint, headers, retry_policy):
        self._endpoint = endpoint
        self._headers = headers
        self._retry_policy = retry_policy
        self.logger = SparkLog(u"ReliableHttpClient")

        self.verify_ssl = not conf.ignore_ssl_errors()
        if not self.verify_ssl:
            self.logger.debug(u"ATTENTION: Will ignore SSL errors. This might render you vulnerable to attacks.")
            requests.packages.urllib3.disable_warnings()
Exemplo n.º 23
0
    def __init__(self, endpoint, headers, retry_policy):
        self._endpoint = endpoint
        self._headers = headers
        self._retry_policy = retry_policy
        if self._endpoint.auth == constants.AUTH_KERBEROS:
            self._auth = HTTPKerberosAuth(**conf.kerberos_auth_configuration())
        elif self._endpoint.auth == constants.AUTH_BASIC:
            self._auth = (self._endpoint.username, self._endpoint.password)
        elif self._endpoint.auth != constants.NO_AUTH:
            raise BadUserConfigurationException(u"Unsupported auth %s" %
                                                self._endpoint.auth)
        self._session = requests.Session()

        self.logger = SparkLog(u"ReliableHttpClient")

        self.verify_ssl = not conf.ignore_ssl_errors()
        if not self.verify_ssl:
            self.logger.debug(
                u"ATTENTION: Will ignore SSL errors. This might render you vulnerable to attacks."
            )
            requests.packages.urllib3.disable_warnings()
Exemplo n.º 24
0
    def __init__(self, shell, data=None, spark_events=None):
        # You must call the parent constructor
        super(SparkMagicBase, self).__init__(shell)

        self.logger = SparkLog(u"SparkMagics")
        self.ipython_display = IpythonDisplay()
        self.spark_controller = SparkController(self.ipython_display)

        self.logger.debug("Initialized spark magics.")

        if spark_events is None:
            spark_events = SparkEvents()
        spark_events.emit_library_loaded_event()
Exemplo n.º 25
0
    def __init__(self, http_client, properties, ipython_display,
                 session_id=-1, sql_created=None, spark_events=None,
                 should_heartbeat=False, heartbeat_thread=None):
        super(LivySession, self).__init__()
        assert u"kind" in list(properties.keys())
        kind = properties[u"kind"]
        self.properties = properties
        self.ipython_display = ipython_display
        self._should_heartbeat = should_heartbeat
        self._user_passed_heartbeat_thread = heartbeat_thread

        if spark_events is None:
            spark_events = SparkEvents()
        self._spark_events = spark_events

        status_sleep_seconds = conf.status_sleep_seconds()
        statement_sleep_seconds = conf.statement_sleep_seconds()
        wait_for_idle_timeout_seconds = conf.wait_for_idle_timeout_seconds()

        assert status_sleep_seconds > 0
        assert statement_sleep_seconds > 0
        assert wait_for_idle_timeout_seconds > 0
        if session_id == -1 and sql_created is True:
            raise BadUserDataException(u"Cannot indicate sql state without session id.")

        self.logger = SparkLog(u"LivySession")

        kind = kind.lower()
        if kind not in constants.SESSION_KINDS_SUPPORTED:
            raise BadUserDataException(u"Session of kind '{}' not supported. Session must be of kinds {}."
                                       .format(kind, ", ".join(constants.SESSION_KINDS_SUPPORTED)))

        self._app_id = None
        self._logs = u""
        self._http_client = http_client
        self._status_sleep_seconds = status_sleep_seconds
        self._statement_sleep_seconds = statement_sleep_seconds
        self._wait_for_idle_timeout_seconds = wait_for_idle_timeout_seconds
        
        self.kind = kind
        self.id = session_id
        self.created_sql_context = sql_created
        
        self._heartbeat_thread = None
        if session_id == -1:
            self.status = constants.NOT_STARTED_SESSION_STATUS
            sql_created = False
        else:
            self.status = constants.BUSY_SESSION_STATUS
            self._start_heartbeat_thread()
Exemplo n.º 26
0
class SparkMagicBase(Magics):
    def __init__(self, shell, data=None, spark_events=None):
        # You must call the parent constructor
        super(SparkMagicBase, self).__init__(shell)

        self.logger = SparkLog(u"SparkMagics")
        self.ipython_display = IpythonDisplay()
        self.spark_controller = SparkController(self.ipython_display)

        self.logger.debug("Initialized spark magics.")

        if spark_events is None:
            spark_events = SparkEvents()
        spark_events.emit_library_loaded_event()

    def execute_sqlquery(self, cell, samplemethod, maxrows, samplefraction,
                         session, output_var, quiet):
        sqlquery = self._sqlquery(cell, samplemethod, maxrows, samplefraction)
        df = self.spark_controller.run_sqlquery(sqlquery, session)
        if output_var is not None:
            self.shell.user_ns[output_var] = df
        if quiet:
            return None
        else:
            return df

    @staticmethod
    def _sqlquery(cell, samplemethod, maxrows, samplefraction):
        return SQLQuery(cell, samplemethod, maxrows, samplefraction)

    def _print_endpoint_info(self, info_sessions, current_session_id):
        if info_sessions:
            info_sessions = sorted(info_sessions, key=lambda s: s.id)
            html = get_sessions_info_html(info_sessions, current_session_id)
            self.ipython_display.html(html)
        else:
            self.ipython_display.html(u'No active sessions.')
Exemplo n.º 27
0
class SparkKernelBase(IPythonKernel):
    def __init__(self, implementation, implementation_version, language, language_version, language_info,
                 session_language, user_code_parser=None, **kwargs):
        # Required by Jupyter - Override
        self.implementation = implementation
        self.implementation_version = implementation_version
        self.language = language
        self.language_version = language_version
        self.language_info = language_info

        # Override
        self.session_language = session_language

        super(SparkKernelBase, self).__init__(**kwargs)

        self.logger = SparkLog(u"{}_jupyter_kernel".format(self.session_language))
        self._fatal_error = None
        self.ipython_display = IpythonDisplay()

        if user_code_parser is None:
            self.user_code_parser = UserCodeParser()
        else:
            self.user_code_parser = user_code_parser

        # Disable warnings for test env in HDI
        requests.packages.urllib3.disable_warnings()

        if not kwargs.get("testing", False):
            self._load_magics_extension()
            self._change_language()
            if conf.use_auto_viz():
                self._register_auto_viz()

    def do_execute(self, code, silent, store_history=True, user_expressions=None, allow_stdin=False):
        def f(self):
            if self._fatal_error is not None:
                return self._repeat_fatal_error()

            return self._do_execute(code, silent, store_history, user_expressions, allow_stdin)
        return wrap_unexpected_exceptions(f, self._complete_cell)(self)

    def do_shutdown(self, restart):
        # Cleanup
        self._delete_session()

        return self._do_shutdown_ipykernel(restart)

    def _do_execute(self, code, silent, store_history, user_expressions, allow_stdin):
        code_to_run = self.user_code_parser.get_code_to_run(code)

        res = self._execute_cell(code_to_run, silent, store_history, user_expressions, allow_stdin)

        return res

    def _load_magics_extension(self):
        register_magics_code = "%load_ext sparkmagic.kernels"
        self._execute_cell(register_magics_code, True, False, shutdown_if_error=True,
                           log_if_error="Failed to load the Spark kernels magics library.")
        self.logger.debug("Loaded magics.")

    def _change_language(self):
        register_magics_code = "%%_do_not_call_change_language -l {}\n ".format(self.session_language)
        self._execute_cell(register_magics_code, True, False, shutdown_if_error=True,
                           log_if_error="Failed to change language to {}.".format(self.session_language))
        self.logger.debug("Changed language.")

    def _register_auto_viz(self):
        from sparkmagic.utils.sparkevents import get_spark_events_handler
        import autovizwidget.utils.configuration as c
        
        handler = get_spark_events_handler()
        c.override("events_handler", handler)
        
        register_auto_viz_code = """from autovizwidget.widget.utils import display_dataframe
ip = get_ipython()
ip.display_formatter.ipython_display_formatter.for_type_by_name('pandas.core.frame', 'DataFrame', display_dataframe)"""
        self._execute_cell(register_auto_viz_code, True, False, shutdown_if_error=True,
                           log_if_error="Failed to register auto viz for notebook.")
        self.logger.debug("Registered auto viz.")

    def _delete_session(self):
        code = "%%_do_not_call_delete_session\n "
        self._execute_cell_for_user(code, True, False)

    def _execute_cell(self, code, silent, store_history=True, user_expressions=None, allow_stdin=False,
                      shutdown_if_error=False, log_if_error=None):
        reply_content = self._execute_cell_for_user(code, silent, store_history, user_expressions, allow_stdin)

        if shutdown_if_error and reply_content[u"status"] == u"error":
            error_from_reply = reply_content[u"evalue"]
            if log_if_error is not None:
                message = "{}\nException details:\n\t\"{}\"".format(log_if_error, error_from_reply)
                return self._abort_with_fatal_error(message)

        return reply_content

    def _execute_cell_for_user(self, code, silent, store_history=True, user_expressions=None, allow_stdin=False):
        return super(SparkKernelBase, self).do_execute(code, silent, store_history, user_expressions, allow_stdin)

    def _do_shutdown_ipykernel(self, restart):
        return super(SparkKernelBase, self).do_shutdown(restart)

    def _complete_cell(self):
        """A method that runs a cell with no effect. Call this and return the value it
        returns when there's some sort of error preventing the user's cell from executing; this
        will register the cell from the Jupyter UI as being completed."""
        return self._execute_cell("None", False, True, None, False)

    def _show_user_error(self, message):
        self.logger.error(message)
        self.ipython_display.send_error(message)

    def _queue_fatal_error(self, message):
        """Queues up a fatal error to be thrown when the next cell is executed; does not
        raise an error immediately. We use this for errors that happen on kernel startup,
        since IPython crashes if we throw an exception in the __init__ method."""
        self._fatal_error = message

    def _abort_with_fatal_error(self, message):
        """Queues up a fatal error and throws it immediately."""
        self._queue_fatal_error(message)
        return self._repeat_fatal_error()

    def _repeat_fatal_error(self):
        """Throws an error that has already been queued."""
        error = conf.fatal_error_suggestion().format(self._fatal_error)
        self.logger.error(error)
        self.ipython_display.send_error(error)
        return self._complete_cell()
Exemplo n.º 28
0
class ReconnectHandler(IPythonHandler):
    logger = None

    @web.authenticated
    @gen.coroutine
    def post(self):
        self.logger = SparkLog(u"ReconnectHandler")

        spark_events = self._get_spark_events()

        try:
            data = json_decode(self.request.body)
        except ValueError as e:
            self.set_status(400)
            msg = "Invalid JSON in request body."
            self.logger.error(msg)
            self.finish(msg)
            spark_events.emit_cluster_change_event(None, 400, False, msg)
            return

        endpoint = None
        try:
            path = self._get_argument_or_raise(data, 'path')
            username = self._get_argument_or_raise(data, 'username')
            password = self._get_argument_or_raise(data, 'password')
            endpoint = self._get_argument_or_raise(data, 'endpoint')
            auth = self._get_argument_if_exists(data, 'auth')
            if auth is None:
                if username == '' and password == '':
                    auth = constants.NO_AUTH
                else:
                    auth = constants.AUTH_BASIC
        except MissingArgumentError as e:
            self.set_status(400)
            self.finish(str(e))
            self.logger.error(str(e))
            spark_events.emit_cluster_change_event(endpoint, 400, False, str(e))
            return

        kernel_name = self._get_kernel_name(data)

        # Get kernel manager, create a new kernel if none exists or restart the existing one when applicable
        kernel_manager = yield self._get_kernel_manager(path, kernel_name)

        # Execute code
        client = kernel_manager.client()
        code = '%{} -s {} -u {} -p {} -t {}'.format(KernelMagics._do_not_call_change_endpoint.__name__, endpoint, username, password, auth)
        response_id = client.execute(code, silent=False, store_history=False)
        msg = client.get_shell_msg(response_id)

        # Get execution info
        successful_message = self._msg_successful(msg)
        error = self._msg_error(msg)
        if successful_message:
            status_code = 200
        else:
            self.logger.error(u"Code to reconnect errored out: {}".format(error))
            status_code = 500

        # Post execution info
        self.set_status(status_code)
        self.finish(json.dumps(dict(success=successful_message, error=error), sort_keys=True))
        spark_events.emit_cluster_change_event(endpoint, status_code, successful_message, error)

    def _get_kernel_name(self, data):
        kernel_name = self._get_argument_if_exists(data, 'kernelname')
        self.logger.debug("Kernel name is {}".format(kernel_name))
        if kernel_name is None:
            kernel_name = conf.server_extension_default_kernel_name()
            self.logger.debug("Defaulting to kernel name {}".format(kernel_name))
        return kernel_name

    def _get_argument_if_exists(self, data, key):
        return data.get(key)

    def _get_argument_or_raise(self, data, key):
        try:
            return data[key]
        except KeyError:
            raise MissingArgumentError(key)

    @gen.coroutine
    def _get_kernel_manager(self, path, kernel_name):
        sessions = self.session_manager.list_sessions()

        kernel_id = None
        for session in sessions:
            if session['notebook']['path'] == path:
                session_id = session['id']
                kernel_id = session['kernel']['id']
                existing_kernel_name = session['kernel']['name']
                break

        if kernel_id is None:
            self.logger.debug(u"Kernel not found. Starting a new kernel.")
            k_m = yield self._get_kernel_manager_new_session(path, kernel_name)
        elif existing_kernel_name != kernel_name:
            self.logger.debug(u"Existing kernel name '{}' does not match requested '{}'. Starting a new kernel.".format(existing_kernel_name, kernel_name))
            self._delete_session(session_id)
            k_m = yield self._get_kernel_manager_new_session(path, kernel_name)
        else:
            self.logger.debug(u"Kernel found. Restarting kernel.")
            k_m = self.kernel_manager.get_kernel(kernel_id)
            k_m.restart_kernel()

        raise gen.Return(k_m)

    @gen.coroutine
    def _get_kernel_manager_new_session(self, path, kernel_name):
        model_future = self.session_manager.create_session(kernel_name=kernel_name, path=path, type="notebook")
        model = yield model_future
        kernel_id = model["kernel"]["id"]
        self.logger.debug("Kernel created with id {}".format(str(kernel_id)))
        k_m = self.kernel_manager.get_kernel(kernel_id)
        raise gen.Return(k_m)

    def _delete_session(self, session_id):
        self.session_manager.delete_session(session_id)

    def _msg_status(self, msg):
        return msg['content']['status']

    def _msg_successful(self, msg):
        return self._msg_status(msg) == 'ok'

    def _msg_error(self, msg):
        if self._msg_status(msg) != 'error':
            return None
        return u'{}:\n{}'.format(msg['content']['ename'], msg['content']['evalue'])

    def _get_spark_events(self):
        spark_events = getattr(self, 'spark_events', None)
        if spark_events is None:
            return SparkEvents()
        return spark_events
Exemplo n.º 29
0
class LivySession(ObjectWithGuid):
    def __init__(self,
                 http_client,
                 properties,
                 ipython_display,
                 session_id=-1,
                 sql_created=None,
                 spark_events=None,
                 should_heartbeat=False,
                 heartbeat_thread=None):
        super(LivySession, self).__init__()
        assert u"kind" in list(properties.keys())
        kind = properties[u"kind"]
        self.properties = properties
        self.ipython_display = ipython_display
        self._should_heartbeat = should_heartbeat
        self._user_passed_heartbeat_thread = heartbeat_thread

        if spark_events is None:
            spark_events = SparkEvents()
        self._spark_events = spark_events

        status_sleep_seconds = conf.status_sleep_seconds()
        statement_sleep_seconds = conf.statement_sleep_seconds()
        wait_for_idle_timeout_seconds = conf.wait_for_idle_timeout_seconds()

        assert status_sleep_seconds > 0
        assert statement_sleep_seconds > 0
        assert wait_for_idle_timeout_seconds > 0
        if session_id == -1 and sql_created is True:
            raise BadUserDataException(
                u"Cannot indicate sql state without session id.")

        self.logger = SparkLog(u"LivySession")

        kind = kind.lower()
        if kind not in constants.SESSION_KINDS_SUPPORTED:
            raise BadUserDataException(
                u"Session of kind '{}' not supported. Session must be of kinds {}."
                .format(kind, ", ".join(constants.SESSION_KINDS_SUPPORTED)))

        self._app_id = None
        self._logs = u""
        self._http_client = http_client
        self._status_sleep_seconds = status_sleep_seconds
        self._statement_sleep_seconds = statement_sleep_seconds
        self._wait_for_idle_timeout_seconds = wait_for_idle_timeout_seconds

        self.kind = kind
        self.id = session_id
        self.created_sql_context = sql_created

        self._heartbeat_thread = None
        if session_id == -1:
            self.status = constants.NOT_STARTED_SESSION_STATUS
            sql_created = False
        else:
            self.status = constants.BUSY_SESSION_STATUS
            self._start_heartbeat_thread()

    def __str__(self):
        return u"Session id: {}\tYARN id: {}\tKind: {}\tState: {}\n\tSpark UI: {}\n\tDriver Log: {}"\
            .format(self.id, self.get_app_id(), self.kind, self.status, self.get_spark_ui_url(), self.get_driver_log_url())

    def start(self, create_sql_context=True):
        """Start the session against actual livy server."""
        self._spark_events.emit_session_creation_start_event(
            self.guid, self.kind)

        try:
            r = self._http_client.post_session(self.properties)
            self.id = r[u"id"]
            self.status = str(r[u"state"])

            self.ipython_display.writeln(u"Creating SparkContext as 'sc'")

            # Start heartbeat thread to keep Livy interactive session alive.
            self._start_heartbeat_thread()

            # We wait for livy_session_startup_timeout_seconds() for the session to start up.
            try:
                self.wait_for_idle(conf.livy_session_startup_timeout_seconds())
            except LivyClientTimeoutException:
                raise LivyClientTimeoutException(
                    u"Session {} did not start up in {} seconds.".format(
                        self.id, conf.livy_session_startup_timeout_seconds()))

            html = get_sessions_info_html([self], self.id)
            self.ipython_display.html(html)

            if create_sql_context:
                self.create_sql_context()
        except Exception as e:
            self._spark_events.emit_session_creation_end_event(
                self.guid, self.kind, self.id, self.status, False,
                e.__class__.__name__, str(e))
            raise
        else:
            self._spark_events.emit_session_creation_end_event(
                self.guid, self.kind, self.id, self.status, True, "", "")

    def create_sql_context(self):
        """Create a sqlContext object on the session. Object will be accessible via variable 'sqlContext'."""
        if self.created_sql_context:
            return
        self.logger.debug(u"Starting '{}' hive session.".format(self.kind))
        self.ipython_display.writeln(u"Creating HiveContext as 'sqlContext'")
        command = self._get_sql_context_creation_command()
        try:
            (success, out) = command.execute(self)
        except LivyClientTimeoutException:
            raise LivyClientTimeoutException(
                u"Failed to create the SqlContext in time. Timed out after {} seconds."
                .format(self._wait_for_idle_timeout_seconds))
        if success:
            self.ipython_display.writeln(
                u"SparkContext and HiveContext created. Executing user code ..."
            )
            self.created_sql_context = True
        else:
            raise FailedToCreateSqlContextException(
                u"Failed to create the SqlContext.\nError: '{}'".format(out))

    def get_app_id(self):
        if self._app_id is None:
            self._app_id = self._http_client.get_session(self.id).get("appId")
        return self._app_id

    def get_app_info(self):
        appInfo = self._http_client.get_session(self.id).get("appInfo")
        return appInfo if appInfo is not None else {}

    def get_app_info_member(self, member_name):
        return self.get_app_info().get(member_name)

    def get_driver_log_url(self):
        return self.get_app_info_member("driverLogUrl")

    def get_logs(self):
        log_array = self._http_client.get_all_session_logs(self.id)[u'log']
        self._logs = "\n".join(log_array)
        return self._logs

    def get_spark_ui_url(self):
        return self.get_app_info_member("sparkUiUrl")

    @property
    def http_client(self):
        return self._http_client

    @staticmethod
    def is_final_status(status):
        return status in constants.FINAL_STATUS

    def delete(self):
        session_id = self.id
        self._spark_events.emit_session_deletion_start_event(
            self.guid, self.kind, session_id, self.status)

        try:
            self.logger.debug(u"Deleting session '{}'".format(session_id))

            if self.status != constants.NOT_STARTED_SESSION_STATUS:
                self._http_client.delete_session(session_id)
                self._stop_heartbeat_thread()
                self.status = constants.DEAD_SESSION_STATUS
                self.id = -1
            else:
                self.ipython_display.send_error(
                    u"Cannot delete session {} that is in state '{}'.".format(
                        session_id, self.status))

        except Exception as e:
            self._spark_events.emit_session_deletion_end_event(
                self.guid, self.kind, session_id, self.status, False,
                e.__class__.__name__, str(e))
            raise
        else:
            self._spark_events.emit_session_deletion_end_event(
                self.guid, self.kind, session_id, self.status, True, "", "")

    def wait_for_idle(self, seconds_to_wait=None):
        """Wait for session to go to idle status. Sleep meanwhile. Calls done every status_sleep_seconds as
        indicated by the constructor.

        Parameters:
            seconds_to_wait : number of seconds to wait before giving up.
        """
        if seconds_to_wait is None:
            seconds_to_wait = self._wait_for_idle_timeout_seconds

        while True:
            self.refresh_status()
            if self.status == constants.IDLE_SESSION_STATUS:
                return

            if self.status in constants.FINAL_STATUS:
                error = u"Session {} unexpectedly reached final status '{}'."\
                    .format(self.id, self.status)
                self.logger.error(error)
                raise LivyUnexpectedStatusException(u'{} See logs:\n{}'.format(
                    error, self.get_logs()))

            if seconds_to_wait <= 0.0:
                error = u"Session {} did not reach idle status in time. Current status is {}."\
                    .format(self.id, self.status)
                self.logger.error(error)
                raise LivyClientTimeoutException(error)

            start_time = time()
            self.logger.debug(
                u"Session {} in state {}. Sleeping {} seconds.".format(
                    self.id, self.status, self._status_sleep_seconds))
            sleep(self._status_sleep_seconds)
            seconds_to_wait -= time() - start_time

    def sleep(self):
        sleep(self._statement_sleep_seconds)

    def refresh_status(self):
        status = self._http_client.get_session(self.id)[u'state']

        if status in constants.POSSIBLE_SESSION_STATUS:
            self.status = status
        else:
            raise LivyUnexpectedStatusException(
                u"Status '{}' not supported by session.".format(status))

        return self.status

    def _get_sql_context_creation_command(self):
        if self.kind == constants.SESSION_KIND_SPARK:
            sql_context_command = u"val sqlContext = new org.apache.spark.sql.hive.HiveContext(sc)"
        elif self.kind == constants.SESSION_KIND_PYSPARK:
            sql_context_command = u"from pyspark.sql import HiveContext\nsqlContext = HiveContext(sc)"
        elif self.kind == constants.SESSION_KIND_SPARKR:
            sql_context_command = u"sqlContext <- sparkRHive.init(sc)"
        else:
            raise BadUserDataException(
                u"Do not know how to create HiveContext in session of kind {}."
                .format(self.kind))

        return Command(sql_context_command)

    def _start_heartbeat_thread(self):
        if self._should_heartbeat and self._heartbeat_thread is None:
            refresh_seconds = conf.heartbeat_refresh_seconds()
            retry_seconds = conf.heartbeat_retry_seconds()

            if self._user_passed_heartbeat_thread is None:
                self._heartbeat_thread = _HeartbeatThread(
                    self, refresh_seconds, retry_seconds)
            else:
                self._heartbeat_thread = self._user_passed_heartbeat_thread

            self._heartbeat_thread.daemon = True
            self._heartbeat_thread.start()

    def _stop_heartbeat_thread(self):
        if self._heartbeat_thread is not None:
            self._heartbeat_thread.stop()
            self._heartbeat_thread = None

    def get_row_html(self, current_session_id):
        return u"""<tr><td>{0}</td><td>{1}</td><td>{2}</td><td>{3}</td><td>{4}</td><td>{5}</td><td>{6}</td></tr>""".format(
            self.id, self.get_app_id(), self.kind, self.status,
            self.get_html_link(u'Link', self.get_spark_ui_url()),
            self.get_html_link(u'Link', self.get_driver_log_url()),
            u"" if current_session_id is None or current_session_id != self.id
            else u"\u2714")

    @staticmethod
    def get_html_link(text, url):
        if url is not None:
            return u"""<a target="_blank" href="{1}">{0}</a>""".format(
                text, url)
        else:
            return u""
Exemplo n.º 30
0
class Command(ObjectWithGuid):
    def __init__(self, code, spark_events=None):
        super(Command, self).__init__()
        self.code = textwrap.dedent(code)
        self.logger = SparkLog(u"Command")
        if spark_events is None:
            spark_events = SparkEvents()
        self._spark_events = spark_events

    def __repr__(self):
        return "Command({}, ...)".format(repr(self.code))

    def __eq__(self, other):
        return self.code == other.code

    def __ne__(self, other):
        return not self == other

    def execute(self, session):
        self._spark_events.emit_statement_execution_start_event(
            session.guid, session.kind, session.id, self.guid)
        statement_id = -1
        try:
            session.wait_for_idle()
            data = {u"code": self.code}
            response = session.http_client.post_statement(session.id, data)
            statement_id = response[u'id']
            output = self._get_statement_output(session, statement_id)
        except Exception as e:
            self._spark_events.emit_statement_execution_end_event(
                session.guid, session.kind, session.id, self.guid,
                statement_id, False, e.__class__.__name__, str(e))
            raise
        else:
            self._spark_events.emit_statement_execution_end_event(
                session.guid, session.kind, session.id, self.guid,
                statement_id, True, "", "")
            return output

    def _get_statement_output(self, session, statement_id):
        retries = 1
        progress = FloatProgress(value=0.0,
                                 min=0,
                                 max=1.0,
                                 step=0.01,
                                 description='Progress:',
                                 bar_style='info',
                                 orientation='horizontal',
                                 layout=Layout(width='50%', height='25px'))
        session.ipython_display.display(progress)

        while True:
            statement = session.http_client.get_statement(
                session.id, statement_id)
            status = statement[u"state"].lower()

            self.logger.debug(u"Status of statement {} is {}.".format(
                statement_id, status))

            if status not in FINAL_STATEMENT_STATUS:
                progress.value = statement.get('progress', 0.0)
                session.sleep(retries)
                retries += 1
            else:
                statement_output = statement[u"output"]
                progress.close()

                if statement_output is None:
                    return (True, u"")

                if statement_output[u"status"] == u"ok":
                    data = statement_output[u"data"]
                    png_encoded = data.get("image/png")
                    if png_encoded:
                        image = Image(base64.b64decode(png_encoded))
                        return (True, image)
                    else:
                        return (True, statement_output[u"data"][u"text/plain"])
                elif statement_output[u"status"] == u"error":
                    return (False, statement_output[u"evalue"] + u"\n" +
                            u"".join(statement_output[u"traceback"]))
                else:
                    raise LivyUnexpectedStatusException(
                        u"Unknown output status from Livy: '{}'".format(
                            statement_output[u"status"]))
Exemplo n.º 31
0
class SparkKernelBase(IPythonKernel):
    def __init__(self,
                 implementation,
                 implementation_version,
                 language,
                 language_version,
                 language_info,
                 session_language,
                 user_code_parser=None,
                 **kwargs):
        # Required by Jupyter - Override
        self.implementation = implementation
        self.implementation_version = implementation_version
        self.language = language
        self.language_version = language_version
        self.language_info = language_info

        # Override
        self.session_language = session_language

        super(SparkKernelBase, self).__init__(**kwargs)

        self.logger = SparkLog(u"{}_jupyter_kernel".format(
            self.session_language))
        self._fatal_error = None
        self.ipython_display = IpythonDisplay()

        if user_code_parser is None:
            self.user_code_parser = UserCodeParser()
        else:
            self.user_code_parser = user_code_parser

        # Disable warnings for test env in HDI
        requests.packages.urllib3.disable_warnings()

        if not kwargs.get("testing", False):
            self._load_magics_extension()
            self._change_language()
            if conf.use_auto_viz():
                self._register_auto_viz()

    def do_execute(self,
                   code,
                   silent,
                   store_history=True,
                   user_expressions=None,
                   allow_stdin=False):
        def f(self):
            if self._fatal_error is not None:
                return self._repeat_fatal_error()

            return self._do_execute(code, silent, store_history,
                                    user_expressions, allow_stdin)

        return wrap_unexpected_exceptions(f, self._complete_cell)(self)

    def do_shutdown(self, restart):
        # Cleanup
        # self._delete_session()

        return self._do_shutdown_ipykernel(restart)

    def _do_execute(self, code, silent, store_history, user_expressions,
                    allow_stdin):
        code_to_run = self.user_code_parser.get_code_to_run(code)

        res = self._execute_cell(code_to_run, silent, store_history,
                                 user_expressions, allow_stdin)

        return res

    def _load_magics_extension(self):
        register_magics_code = "%load_ext sparkmagic.kernels"
        self._execute_cell(
            register_magics_code,
            True,
            False,
            shutdown_if_error=True,
            log_if_error="Failed to load the Spark kernels magics library.")
        self.logger.debug("Loaded magics.")

    def _change_language(self):
        register_magics_code = "%%_do_not_call_change_language -l {}\n ".format(
            self.session_language)
        self._execute_cell(
            register_magics_code,
            True,
            False,
            shutdown_if_error=True,
            log_if_error="Failed to change language to {}.".format(
                self.session_language))
        self.logger.debug("Changed language.")

    def _register_auto_viz(self):
        from sparkmagic.utils.sparkevents import get_spark_events_handler
        import autovizwidget.utils.configuration as c

        handler = get_spark_events_handler()
        c.override("events_handler", handler)

        register_auto_viz_code = """from autovizwidget.widget.utils import display_dataframe
ip = get_ipython()
ip.display_formatter.ipython_display_formatter.for_type_by_name('pandas.core.frame', 'DataFrame', display_dataframe)"""
        self._execute_cell(
            register_auto_viz_code,
            True,
            False,
            shutdown_if_error=True,
            log_if_error="Failed to register auto viz for notebook.")
        self.logger.debug("Registered auto viz.")

    def _delete_session(self):
        code = "%%_do_not_call_delete_session\n "
        self._execute_cell_for_user(code, True, False)

    def _execute_cell(self,
                      code,
                      silent,
                      store_history=True,
                      user_expressions=None,
                      allow_stdin=False,
                      shutdown_if_error=False,
                      log_if_error=None):
        reply_content = self._execute_cell_for_user(code, silent,
                                                    store_history,
                                                    user_expressions,
                                                    allow_stdin)

        if shutdown_if_error and reply_content[u"status"] == u"error":
            error_from_reply = reply_content[u"evalue"]
            if log_if_error is not None:
                message = "{}\nException details:\n\t\"{}\"".format(
                    log_if_error, error_from_reply)
                return self._abort_with_fatal_error(message)

        return reply_content

    def _execute_cell_for_user(self,
                               code,
                               silent,
                               store_history=True,
                               user_expressions=None,
                               allow_stdin=False):
        return super(SparkKernelBase,
                     self).do_execute(code, silent, store_history,
                                      user_expressions, allow_stdin)

    def _do_shutdown_ipykernel(self, restart):
        return super(SparkKernelBase, self).do_shutdown(restart)

    def _complete_cell(self):
        """A method that runs a cell with no effect. Call this and return the value it
        returns when there's some sort of error preventing the user's cell from executing; this
        will register the cell from the Jupyter UI as being completed."""
        return self._execute_cell("None", False, True, None, False)

    def _show_user_error(self, message):
        self.logger.error(message)
        self.ipython_display.send_error(message)

    def _queue_fatal_error(self, message):
        """Queues up a fatal error to be thrown when the next cell is executed; does not
        raise an error immediately. We use this for errors that happen on kernel startup,
        since IPython crashes if we throw an exception in the __init__ method."""
        self._fatal_error = message

    def _abort_with_fatal_error(self, message):
        """Queues up a fatal error and throws it immediately."""
        self._queue_fatal_error(message)
        return self._repeat_fatal_error()

    def _repeat_fatal_error(self):
        """Throws an error that has already been queued."""
        error = conf.fatal_error_suggestion().format(self._fatal_error)
        self.logger.error(error)
        self.ipython_display.send_error(error)
        return self._complete_cell()
class ReliableHttpClient(object):
    """Http client that is reliable in its requests. Uses requests library."""
    def __init__(self, endpoint, headers, retry_policy):
        self._endpoint = endpoint
        self._headers = headers
        self._retry_policy = retry_policy
        if self._endpoint.auth == constants.AUTH_KERBEROS:
            self._auth = HTTPKerberosAuth(mutual_authentication=REQUIRED)
        elif self._endpoint.auth == constants.AUTH_BASIC:
            self._auth = (self._endpoint.username, self._endpoint.password)
        elif self._endpoint.auth != constants.NO_AUTH:
            raise BadUserConfigurationException(u"Unsupported auth %s" %
                                                self._endpoint.auth)

        self.logger = SparkLog(u"ReliableHttpClient")

        self.verify_ssl = not conf.ignore_ssl_errors()
        if not self.verify_ssl:
            self.logger.debug(
                u"ATTENTION: Will ignore SSL errors. This might render you vulnerable to attacks."
            )
            requests.packages.urllib3.disable_warnings()

    def get_headers(self):
        return self._headers

    def compose_url(self, relative_url):
        r_u = "/{}".format(relative_url.rstrip(u"/").lstrip(u"/"))
        return self._endpoint.url + r_u

    def get(self, relative_url, accepted_status_codes):
        """Sends a get request. Returns a response."""
        return self._send_request(relative_url, accepted_status_codes,
                                  requests.get)

    def post(self, relative_url, accepted_status_codes, data):
        """Sends a post request. Returns a response."""
        return self._send_request(relative_url, accepted_status_codes,
                                  requests.post, data)

    def delete(self, relative_url, accepted_status_codes):
        """Sends a delete request. Returns a response."""
        return self._send_request(relative_url, accepted_status_codes,
                                  requests.delete)

    def _send_request(self,
                      relative_url,
                      accepted_status_codes,
                      function,
                      data=None):
        return self._send_request_helper(self.compose_url(relative_url),
                                         accepted_status_codes, function, data,
                                         0)

    def _send_request_helper(self, url, accepted_status_codes, function, data,
                             retry_count):
        while True:
            try:
                if self._endpoint.auth == constants.NO_AUTH:
                    if data is None:
                        r = function(url,
                                     headers=self._headers,
                                     verify=self.verify_ssl)
                    else:
                        r = function(url,
                                     headers=self._headers,
                                     data=json.dumps(data),
                                     verify=self.verify_ssl)
                else:
                    if data is None:
                        r = function(url,
                                     headers=self._headers,
                                     auth=self._auth,
                                     verify=self.verify_ssl)
                    else:
                        r = function(url,
                                     headers=self._headers,
                                     auth=self._auth,
                                     data=json.dumps(data),
                                     verify=self.verify_ssl)
            except requests.exceptions.RequestException as e:
                error = True
                r = None
                status = None
                text = None

                self.logger.error(u"Request to '{}' failed with '{}'".format(
                    url, e))
            else:
                error = False
                status = r.status_code
                text = r.text

            if error or status not in accepted_status_codes:
                if self._retry_policy.should_retry(status, error, retry_count):
                    sleep(self._retry_policy.seconds_to_sleep(retry_count))
                    retry_count += 1
                    continue

                if error:
                    raise HttpClientException(
                        u"Error sending http request and maximum retry encountered."
                    )
                else:
                    raise HttpClientException(
                        u"Invalid status code '{}' from {} with error payload: {}"
                        .format(status, url, text))
            return r
Exemplo n.º 33
0
class SparkController(object):
    def __init__(self, ipython_display):
        self.logger = SparkLog(u"SparkController")
        self.ipython_display = ipython_display
        self.session_manager = SessionManager()

    def get_app_id(self, client_name=None):
        session_to_use = self.get_session_by_name_or_default(client_name)
        return session_to_use.get_app_id()

    def get_driver_log_url(self, client_name=None):
        session_to_use = self.get_session_by_name_or_default(client_name)
        return session_to_use.get_driver_log_url()

    def get_logs(self, client_name=None):
        session_to_use = self.get_session_by_name_or_default(client_name)
        return session_to_use.get_logs()

    def get_spark_ui_url(self, client_name=None):
        session_to_use = self.get_session_by_name_or_default(client_name)
        return session_to_use.get_spark_ui_url()

    def run_command(self, command, client_name=None):
        session_to_use = self.get_session_by_name_or_default(client_name)
        return command.execute(session_to_use)

    def run_sqlquery(self, sqlquery, client_name=None):
        session_to_use = self.get_session_by_name_or_default(client_name)
        return sqlquery.execute(session_to_use)

    def get_all_sessions_endpoint(self, endpoint):
        http_client = self._http_client(endpoint)
        sessions = http_client.get_sessions()[u"sessions"]
        session_list = [self._livy_session(http_client, {u"kind": s[u"kind"]},
                                           self.ipython_display, s[u"id"])
                        for s in sessions]
        for s in session_list:
            s.refresh_status()
        return session_list

    def get_all_sessions_endpoint_info(self, endpoint):
        sessions = self.get_all_sessions_endpoint(endpoint)
        return [str(s) for s in sessions]

    def cleanup(self):
        self.session_manager.clean_up_all()

    def cleanup_endpoint(self, endpoint):
        for session in self.get_all_sessions_endpoint(endpoint):
            session.delete()

    def delete_session_by_name(self, name):
        self.session_manager.delete_client(name)

    def delete_session_by_id(self, endpoint, session_id):
        http_client = self._http_client(endpoint)
        response = http_client.get_session(session_id)
        http_client = self._http_client(endpoint)
        session = self._livy_session(http_client, {u"kind": response[u"kind"]},
                                     self.ipython_display, session_id, False)
        session.delete()

    def add_session(self, name, endpoint, skip_if_exists, properties):
        if skip_if_exists and (name in self.session_manager.get_sessions_list()):
            self.logger.debug(u"Skipping {} because it already exists in list of sessions.".format(name))
            return
        http_client = self._http_client(endpoint)
        session = self._livy_session(http_client, properties, self.ipython_display)
        self.session_manager.add_session(name, session)
        session.start()

    def get_session_id_for_client(self, name):
        return self.session_manager.get_session_id_for_client(name)

    def get_client_keys(self):
        return self.session_manager.get_sessions_list()

    def get_manager_sessions_str(self):
        return self.session_manager.get_sessions_info()

    def get_session_by_name_or_default(self, client_name):
        if client_name is None:
            return self.session_manager.get_any_session()
        else:
            client_name = client_name.lower()
            return self.session_manager.get_session(client_name)

    def get_managed_clients(self):
        return self.session_manager.sessions

    @staticmethod
    def _livy_session(http_client, properties, ipython_display,
                      session_id=-1, sql_created=None):
        return LivySession(http_client, properties, ipython_display,
                           session_id, sql_created)

    @staticmethod
    def _http_client(endpoint):
        return LivyReliableHttpClient.from_endpoint(endpoint)
Exemplo n.º 34
0
class SparkMagicBase(Magics):

    _STRING_VAR_TYPE = 'str'
    _PANDAS_DATAFRAME_VAR_TYPE = 'df'
    _ALLOWED_LOCAL_TO_SPARK_TYPES = [
        _STRING_VAR_TYPE, _PANDAS_DATAFRAME_VAR_TYPE
    ]

    def __init__(self, shell, data=None, spark_events=None):
        # You must call the parent constructor
        super(SparkMagicBase, self).__init__(shell)

        self.logger = SparkLog(u"SparkMagics")
        self.ipython_display = IpythonDisplay()
        self.spark_controller = SparkController(self.ipython_display)

        self.logger.debug(u'Initialized spark magics.')

        if spark_events is None:
            spark_events = SparkEvents()
        spark_events.emit_library_loaded_event()

    def do_send_to_spark(self, cell, input_variable_name, var_type,
                         output_variable_name, max_rows, session_name):
        try:
            input_variable_value = self.shell.user_ns[input_variable_name]
        except KeyError:
            raise BadUserDataException(
                u'Variable named {} not found.'.format(input_variable_name))
        if input_variable_value is None:
            raise BadUserDataException(
                u'Value of {} is None!'.format(input_variable_name))

        if not output_variable_name:
            output_variable_name = input_variable_name

        if not max_rows:
            max_rows = conf.default_maxrows()

        input_variable_type = var_type.lower()
        if input_variable_type == self._STRING_VAR_TYPE:
            command = SendStringToSparkCommand(input_variable_name,
                                               input_variable_value,
                                               output_variable_name)
        elif input_variable_type == self._PANDAS_DATAFRAME_VAR_TYPE:
            command = SendPandasDfToSparkCommand(input_variable_name,
                                                 input_variable_value,
                                                 output_variable_name,
                                                 max_rows)
        else:
            raise BadUserDataException(
                u'Invalid or incorrect -t type. Available are: [{}]'.format(
                    u','.join(self._ALLOWED_LOCAL_TO_SPARK_TYPES)))

        (success, result,
         mime_type) = self.spark_controller.run_command(command, None)
        if not success:
            self.ipython_display.send_error(result)
        else:
            self.ipython_display.write(
                u'Successfully passed \'{}\' as \'{}\' to Spark'
                u' kernel'.format(input_variable_name, output_variable_name))

    def execute_spark(self,
                      cell,
                      output_var,
                      samplemethod,
                      maxrows,
                      samplefraction,
                      session_name,
                      coerce,
                      output_handler=None):
        output_handler = output_handler or SparkOutputHandler(
            html=self.ipython_display.html,
            text=self.ipython_display.write,
            default=self.ipython_display.display)

        (success, out,
         mimetype) = self.spark_controller.run_command(Command(cell),
                                                       session_name)
        if not success:
            if conf.shutdown_session_on_spark_statement_errors():
                self.spark_controller.cleanup()

            raise SparkStatementException(out)
        else:
            if isinstance(out, string_types):
                if mimetype == MIMETYPE_TEXT_HTML:
                    output_handler.html(out)
                else:
                    output_handler.text(out)
            else:
                output_handler.default(out)
            if output_var is not None:
                spark_store_command = self._spark_store_command(
                    output_var, samplemethod, maxrows, samplefraction, coerce)
                df = self.spark_controller.run_command(spark_store_command,
                                                       session_name)
                self.shell.user_ns[output_var] = df

    @staticmethod
    def _spark_store_command(output_var, samplemethod, maxrows, samplefraction,
                             coerce):
        return SparkStoreCommand(output_var,
                                 samplemethod,
                                 maxrows,
                                 samplefraction,
                                 coerce=coerce)

    def execute_sqlquery(self, cell, samplemethod, maxrows, samplefraction,
                         session, output_var, quiet, coerce):
        sqlquery = self._sqlquery(cell, samplemethod, maxrows, samplefraction,
                                  coerce)
        df = self.spark_controller.run_sqlquery(sqlquery, session)
        if output_var is not None:
            self.shell.user_ns[output_var] = df
        if quiet:
            return None
        else:
            return df

    @staticmethod
    def _sqlquery(cell, samplemethod, maxrows, samplefraction, coerce):
        return SQLQuery(cell,
                        samplemethod,
                        maxrows,
                        samplefraction,
                        coerce=coerce)

    def _print_endpoint_info(self, info_sessions, current_session_id):
        if info_sessions:
            info_sessions = sorted(info_sessions, key=lambda s: s.id)
            html = get_sessions_info_html(info_sessions, current_session_id)
            self.ipython_display.html(html)
        else:
            self.ipython_display.html(u'No active sessions.')
Exemplo n.º 35
0
class SparkController(object):
    def __init__(self, ipython_display):
        self.logger = SparkLog(u"SparkController")
        self.ipython_display = ipython_display
        self.session_manager = SessionManager()

    def get_app_id(self, client_name=None):
        session_to_use = self.get_session_by_name_or_default(client_name)
        return session_to_use.get_app_id()

    def get_driver_log_url(self, client_name=None):
        session_to_use = self.get_session_by_name_or_default(client_name)
        return session_to_use.get_driver_log_url()

    def get_logs(self, client_name=None):
        session_to_use = self.get_session_by_name_or_default(client_name)
        return session_to_use.get_logs()

    def get_spark_ui_url(self, client_name=None):
        session_to_use = self.get_session_by_name_or_default(client_name)
        return session_to_use.get_spark_ui_url()

    def run_command(self, command, client_name=None):
        session_to_use = self.get_session_by_name_or_default(client_name)
        return command.execute(session_to_use)

    def run_sqlquery(self, sqlquery, client_name=None):
        session_to_use = self.get_session_by_name_or_default(client_name)
        return sqlquery.execute(session_to_use)

    def get_all_sessions_endpoint(self, endpoint):
        http_client = self._http_client(endpoint)
        sessions = http_client.get_sessions()[u"sessions"]
        session_list = [
            self._livy_session(http_client, {u"kind": s[u"kind"]},
                               self.ipython_display, s[u"id"])
            for s in sessions
        ]
        for s in session_list:
            s.refresh_status()
        return session_list

    def get_all_sessions_endpoint_info(self, endpoint):
        sessions = self.get_all_sessions_endpoint(endpoint)
        return [str(s) for s in sessions]

    def cleanup(self):
        self.session_manager.clean_up_all()

    def cleanup_endpoint(self, endpoint):
        for session in self.get_all_sessions_endpoint(endpoint):
            session.delete()

    def delete_session_by_name(self, name):
        self.session_manager.delete_client(name)

    def delete_session_by_id(self, endpoint, session_id):
        http_client = self._http_client(endpoint)
        response = http_client.get_session(session_id)
        http_client = self._http_client(endpoint)
        session = self._livy_session(http_client, {u"kind": response[u"kind"]},
                                     self.ipython_display, session_id, False)
        session.delete()

    def add_session(self, name, endpoint, skip_if_exists, properties):
        if skip_if_exists and (name
                               in self.session_manager.get_sessions_list()):
            self.logger.debug(
                u"Skipping {} because it already exists in list of sessions.".
                format(name))
            return
        http_client = self._http_client(endpoint)
        session = self._livy_session(http_client, properties,
                                     self.ipython_display)
        self.session_manager.add_session(name, session)
        session.start()

    def get_session_id_for_client(self, name):
        return self.session_manager.get_session_id_for_client(name)

    def get_client_keys(self):
        return self.session_manager.get_sessions_list()

    def get_manager_sessions_str(self):
        return self.session_manager.get_sessions_info()

    def get_session_by_name_or_default(self, client_name):
        if client_name is None:
            return self.session_manager.get_any_session()
        else:
            client_name = client_name.lower()
            return self.session_manager.get_session(client_name)

    def get_managed_clients(self):
        return self.session_manager.sessions

    @staticmethod
    def _livy_session(http_client,
                      properties,
                      ipython_display,
                      session_id=-1,
                      sql_created=None):
        return LivySession(http_client, properties, ipython_display,
                           session_id, sql_created)

    @staticmethod
    def _http_client(endpoint):
        return LivyReliableHttpClient.from_endpoint(endpoint)
Exemplo n.º 36
0
class SparkController(object):

    def __init__(self, ipython_display):
        self.logger = SparkLog(u"SparkController")
        self.ipython_display = ipython_display
        self.session_manager = SessionManager(ipython_display)
        # this is to reuse the already created http clients
        # since the reliablehttpclient uses requests session
        self._http_clients = {}

    def get_app_id(self, client_name=None):
        session_to_use = self.get_session_by_name_or_default(client_name)
        return session_to_use.get_app_id()

    def get_driver_log_url(self, client_name=None):
        session_to_use = self.get_session_by_name_or_default(client_name)
        return session_to_use.get_driver_log_url()

    def get_logs(self, client_name=None):
        session_to_use = self.get_session_by_name_or_default(client_name)
        return session_to_use.get_logs()

    def get_spark_ui_url(self, client_name=None):
        session_to_use = self.get_session_by_name_or_default(client_name)
        return session_to_use.get_spark_ui_url()

    def run_command(self, command, client_name=None):
        session_to_use = self.get_session_by_name_or_default(client_name)
        return command.execute(session_to_use)

    def run_sqlquery(self, sqlquery, client_name=None):
        session_to_use = self.get_session_by_name_or_default(client_name)
        return sqlquery.execute(session_to_use)

    def get_all_sessions_endpoint(self, endpoint):
        http_client = self._http_client(endpoint)
        sessions = http_client.get_sessions()[u"sessions"]
        session_list = [self._livy_session(http_client, {constants.LIVY_KIND_PARAM: s[constants.LIVY_KIND_PARAM]},
                                           self.ipython_display, s[u"id"])
                        for s in sessions]
        for s in session_list:
            s.refresh_status_and_info()
        return session_list

    def get_all_sessions_endpoint_info(self, endpoint):
        sessions = self.get_all_sessions_endpoint(endpoint)
        return [str(s) for s in sessions]

    def cleanup(self):
        self.session_manager.clean_up_all()

    def cleanup_endpoint(self, endpoint):
        for session in self.get_all_sessions_endpoint(endpoint):
            session.delete()

    def delete_session_by_name(self, name):
        self.session_manager.delete_client(name)

    def delete_session_by_id(self, endpoint, session_id):
        name = self.session_manager.get_session_name_by_id_endpoint(session_id, endpoint)

        if name in self.session_manager.get_sessions_list():
            self.delete_session_by_name(name)
        else:
            http_client = self._http_client(endpoint)
            response = http_client.get_session(session_id)
            http_client = self._http_client(endpoint)
            session = self._livy_session(http_client, {constants.LIVY_KIND_PARAM: response[constants.LIVY_KIND_PARAM]},
                                        self.ipython_display, session_id)
            session.delete()

    def add_session(self, name, endpoint, skip_if_exists, properties):
        if skip_if_exists and (name in self.session_manager.get_sessions_list()):
            self.logger.debug(u"Skipping {} because it already exists in list of sessions.".format(name))
            return
        http_client = self._http_client(endpoint)
        session = self._livy_session(http_client, properties, self.ipython_display)
        self.session_manager.add_session(name, session)
        session.start()

    def get_session_id_for_client(self, name):
        return self.session_manager.get_session_id_for_client(name)

    def get_client_keys(self):
        return self.session_manager.get_sessions_list()

    def get_manager_sessions_str(self):
        return self.session_manager.get_sessions_info()

    def get_session_by_name_or_default(self, client_name):
        if client_name is None:
            return self.session_manager.get_any_session()
        else:
            return self.session_manager.get_session(client_name)

    def get_managed_clients(self):
        return self.session_manager.sessions

    @staticmethod
    def _livy_session(http_client, properties, ipython_display,
                      session_id=-1):
        return LivySession(http_client, properties, ipython_display,
                           session_id, heartbeat_timeout=conf.livy_server_heartbeat_timeout_seconds())

    def _http_client(self, endpoint):
        if endpoint not in self._http_clients:
            self._http_clients[endpoint] = LivyReliableHttpClient.from_endpoint(endpoint)
        return self._http_clients[endpoint]
Exemplo n.º 37
0
class LivySession(ObjectWithGuid):
    def __init__(self, http_client, properties, ipython_display,
                 session_id=-1, spark_events=None,
                 heartbeat_timeout=0, heartbeat_thread=None):
        super(LivySession, self).__init__()
        assert constants.LIVY_KIND_PARAM in list(properties.keys())
        kind = properties[constants.LIVY_KIND_PARAM]

        should_heartbeat = False
        if heartbeat_timeout > 0:
            should_heartbeat = True
            properties[constants.LIVY_HEARTBEAT_TIMEOUT_PARAM] = heartbeat_timeout
        elif constants.LIVY_HEARTBEAT_TIMEOUT_PARAM in list(properties.keys()):
            properties.pop(constants.LIVY_HEARTBEAT_TIMEOUT_PARAM)

        self.properties = properties
        self.ipython_display = ipython_display
        self._should_heartbeat = should_heartbeat
        self._user_passed_heartbeat_thread = heartbeat_thread

        if spark_events is None:
            spark_events = SparkEvents()
        self._spark_events = spark_events

        status_sleep_seconds = conf.status_sleep_seconds()
        statement_sleep_seconds = conf.statement_sleep_seconds()
        wait_for_idle_timeout_seconds = conf.wait_for_idle_timeout_seconds()

        assert status_sleep_seconds > 0
        assert statement_sleep_seconds > 0
        assert wait_for_idle_timeout_seconds > 0

        self.logger = SparkLog(u"LivySession")

        kind = kind.lower()
        if kind not in constants.SESSION_KINDS_SUPPORTED:
            raise BadUserDataException(u"Session of kind '{}' not supported. Session must be of kinds {}."
                                       .format(kind, ", ".join(constants.SESSION_KINDS_SUPPORTED)))

        self._app_id = None
        self._logs = u""
        self._http_client = http_client
        self._status_sleep_seconds = status_sleep_seconds
        self._statement_sleep_seconds = statement_sleep_seconds
        self._wait_for_idle_timeout_seconds = wait_for_idle_timeout_seconds
        self._printed_resource_warning = False

        self.kind = kind
        self.id = session_id
        self.session_info = u""
        
        self._heartbeat_thread = None
        if session_id == -1:
            self.status = constants.NOT_STARTED_SESSION_STATUS
        else:
            self.status = constants.BUSY_SESSION_STATUS
            self._start_heartbeat_thread()

    def __str__(self):
        return u"Session id: {}\tYARN id: {}\tKind: {}\tState: {}\n\tSpark UI: {}\n\tDriver Log: {}"\
            .format(self.id, self.get_app_id(), self.kind, self.status, self.get_spark_ui_url(), self.get_driver_log_url())

    def start(self):
        """Start the session against actual livy server."""
        self._spark_events.emit_session_creation_start_event(self.guid, self.kind)
        self._printed_resource_warning = False

        try:
            r = self._http_client.post_session(self.properties)
            self.id = r[u"id"]
            self.status = str(r[u"state"])

            self.ipython_display.writeln(u"Starting Spark application")
            
            # Start heartbeat thread to keep Livy interactive session alive.
            self._start_heartbeat_thread()
            
            # We wait for livy_session_startup_timeout_seconds() for the session to start up.
            try:
                self.wait_for_idle(conf.livy_session_startup_timeout_seconds())
            except LivyClientTimeoutException:
                raise LivyClientTimeoutException(u"Session {} did not start up in {} seconds."
                                                 .format(self.id, conf.livy_session_startup_timeout_seconds()))

            html = get_sessions_info_html([self], self.id)
            self.ipython_display.html(html)

            command = Command("spark")
            (success, out) = command.execute(self)

            if success:
                self.ipython_display.writeln(u"SparkSession available as 'spark'.")
                self.sql_context_variable_name = "spark"
            else:
                command = Command("sqlContext")
                (success, out) = command.execute(self)
                if success:
                    self.ipython_display.writeln(u"SparkContext available as 'sc'.")
                    if ("hive" in out.lower()):
                        self.ipython_display.writeln(u"HiveContext available as 'sqlContext'.")
                    else:
                        self.ipython_display.writeln(u"SqlContext available as 'sqlContext'.")
                    self.sql_context_variable_name = "sqlContext"
                else:
                    raise SqlContextNotFoundException(u"Neither SparkSession nor HiveContext/SqlContext is available.")
        except Exception as e:
            self._spark_events.emit_session_creation_end_event(self.guid, self.kind, self.id, self.status,
                                                               False, e.__class__.__name__, str(e))
            raise
        else:
            self._spark_events.emit_session_creation_end_event(self.guid, self.kind, self.id, self.status, True, "", "")

    def get_app_id(self):
        if self._app_id is None:
            self._app_id = self._http_client.get_session(self.id).get("appId")
        return self._app_id

    def get_app_info(self):
        appInfo = self._http_client.get_session(self.id).get("appInfo")
        return appInfo if appInfo is not None else {}

    def get_app_info_member(self, member_name):
        return self.get_app_info().get(member_name)

    def get_driver_log_url(self):
        return self.get_app_info_member("driverLogUrl")

    def get_logs(self):
        log_array = self._http_client.get_all_session_logs(self.id)[u'log']
        self._logs = "\n".join(log_array)
        return self._logs

    def get_spark_ui_url(self):
        return self.get_app_info_member("sparkUiUrl")

    @property
    def http_client(self):
        return self._http_client

    @staticmethod
    def is_final_status(status):
        return status in constants.FINAL_STATUS

    def delete(self):
        session_id = self.id
        self._spark_events.emit_session_deletion_start_event(self.guid, self.kind, session_id, self.status)

        try:
            self.logger.debug(u"Deleting session '{}'".format(session_id))
            
            if self.status != constants.NOT_STARTED_SESSION_STATUS:
                self._http_client.delete_session(session_id)
                self._stop_heartbeat_thread()
                self.status = constants.DEAD_SESSION_STATUS
                self.id = -1
            else:
                self.ipython_display.send_error(u"Cannot delete session {} that is in state '{}'."
                                                .format(session_id, self.status))
            
        except Exception as e:
            self._spark_events.emit_session_deletion_end_event(self.guid, self.kind, session_id, self.status, False,
                                                               e.__class__.__name__, str(e))
            raise
        else:
            self._spark_events.emit_session_deletion_end_event(self.guid, self.kind, session_id, self.status, True, "", "")

    def wait_for_idle(self, seconds_to_wait=None):
        """Wait for session to go to idle status. Sleep meanwhile. Calls done every status_sleep_seconds as
        indicated by the constructor.

        Parameters:
            seconds_to_wait : number of seconds to wait before giving up.
        """
        if seconds_to_wait is None:
            seconds_to_wait = self._wait_for_idle_timeout_seconds

        while True:
            self.refresh_status_and_info()
            if self.status == constants.IDLE_SESSION_STATUS:
                return

            if self.status in constants.FINAL_STATUS:
                error = u"Session {} unexpectedly reached final status '{}'."\
                    .format(self.id, self.status)
                self.logger.error(error)
                raise LivyUnexpectedStatusException(u'{} See logs:\n{}'.format(error, self.get_logs()))

            if seconds_to_wait <= 0.0:
                error = u"Session {} did not reach idle status in time. Current status is {}."\
                    .format(self.id, self.status)
                self.logger.error(error)
                raise LivyClientTimeoutException(error)

            if constants.YARN_RESOURCE_LIMIT_MSG in self.session_info and \
                not self._printed_resource_warning:
                self.ipython_display.send_error(constants.RESOURCE_LIMIT_WARNING\
                                                .format(conf.resource_limit_mitigation_suggestion()))
                self._printed_resource_warning = True

            start_time = time()
            self.logger.debug(u"Session {} in state {}. Sleeping {} seconds."
                              .format(self.id, self.status, self._status_sleep_seconds))
            sleep(self._status_sleep_seconds)
            seconds_to_wait -= time() - start_time

    def sleep(self):
        sleep(self._statement_sleep_seconds)

    # This function will refresh the status and get the logs in a single call.
    # Only the status will be returned as the return value.
    def refresh_status_and_info(self):
        response = self._http_client.get_session(self.id)
        status = response[u'state']
        log_array = response[u'log']

        if status in constants.POSSIBLE_SESSION_STATUS:
            self.status = status
            self.session_info = u"\n".join(log_array)
        else:
           raise LivyUnexpectedStatusException(u"Status '{}' not supported by session.".format(status))

    def _start_heartbeat_thread(self):
        if self._should_heartbeat and self._heartbeat_thread is None:
            refresh_seconds = conf.heartbeat_refresh_seconds()
            retry_seconds = conf.heartbeat_retry_seconds()
            
            if self._user_passed_heartbeat_thread is None:
                self._heartbeat_thread = _HeartbeatThread(self, refresh_seconds, retry_seconds)
            else:
                self._heartbeat_thread = self._user_passed_heartbeat_thread
            
            self._heartbeat_thread.daemon = True
            self._heartbeat_thread.start()

    def _stop_heartbeat_thread(self):
        if self._heartbeat_thread is not None:
            self._heartbeat_thread.stop()
            self._heartbeat_thread = None

    def get_row_html(self, current_session_id):
        return u"""<tr><td>{0}</td><td>{1}</td><td>{2}</td><td>{3}</td><td>{4}</td><td>{5}</td><td>{6}</td></tr>""".format(
            self.id, self.get_app_id(), self.kind, self.status,
            self.get_html_link(u'Link', self.get_spark_ui_url()), self.get_html_link(u'Link', self.get_driver_log_url()),
            u"" if current_session_id is None or current_session_id != self.id else u"\u2714"
        )

    @staticmethod
    def get_html_link(text, url):
        if url is not None:
            return u"""<a target="_blank" href="{1}">{0}</a>""".format(text, url)
        else:
            return u""
Exemplo n.º 38
0
class LivySession(ObjectWithGuid):
    def __init__(self, http_client, properties, ipython_display,
                 session_id=-1, sql_created=None, spark_events=None):
        super(LivySession, self).__init__()
        assert u"kind" in list(properties.keys())
        kind = properties[u"kind"]
        self.properties = properties
        self.ipython_display = ipython_display

        if spark_events is None:
            spark_events = SparkEvents()
        self._spark_events = spark_events

        status_sleep_seconds = conf.status_sleep_seconds()
        statement_sleep_seconds = conf.statement_sleep_seconds()
        wait_for_idle_timeout_seconds = conf.wait_for_idle_timeout_seconds()

        assert status_sleep_seconds > 0
        assert statement_sleep_seconds > 0
        assert wait_for_idle_timeout_seconds > 0
        if session_id == -1 and sql_created is True:
            raise BadUserDataException(u"Cannot indicate sql state without session id.")

        self.logger = SparkLog(u"LivySession")

        kind = kind.lower()
        if kind not in constants.SESSION_KINDS_SUPPORTED:
            raise BadUserDataException(u"Session of kind '{}' not supported. Session must be of kinds {}."
                                       .format(kind, ", ".join(constants.SESSION_KINDS_SUPPORTED)))

        if session_id == -1:
            self.status = constants.NOT_STARTED_SESSION_STATUS
            sql_created = False
        else:
            self.status = constants.BUSY_SESSION_STATUS

        self._app_id = None
        self._logs = u""
        self._http_client = http_client
        self._status_sleep_seconds = status_sleep_seconds
        self._statement_sleep_seconds = statement_sleep_seconds
        self._wait_for_idle_timeout_seconds = wait_for_idle_timeout_seconds

        self.kind = kind
        self.id = session_id
        self.created_sql_context = sql_created

    def __str__(self):
        return u"Session id: {}\tYARN id: {}\tKind: {}\tState: {}\n\tSpark UI: {}\n\tDriver Log: {}"\
            .format(self.id, self.get_app_id(), self.kind, self.status, self.get_spark_ui_url(), self.get_driver_log_url())

    def start(self, create_sql_context=True):
        """Start the session against actual livy server."""
        self._spark_events.emit_session_creation_start_event(self.guid, self.kind)

        try:
            r = self._http_client.post_session(self.properties)
            self.id = r[u"id"]
            self.status = str(r[u"state"])

            self.ipython_display.writeln(u"Creating SparkContext as 'sc'")
            # We wait for livy_session_startup_timeout_seconds() for the session to start up.
            try:
                self.wait_for_idle(conf.livy_session_startup_timeout_seconds())
            except LivyClientTimeoutException:
                raise LivyClientTimeoutException(u"Session {} did not start up in {} seconds."
                                                 .format(self.id, conf.livy_session_startup_timeout_seconds()))

            if create_sql_context:
                self.create_sql_context()
        except Exception as e:
            self._spark_events.emit_session_creation_end_event(self.guid, self.kind, self.id, self.status,
                                                               False, e.__class__.__name__, str(e))
            raise
        else:
            self._spark_events.emit_session_creation_end_event(self.guid, self.kind, self.id, self.status, True, "", "")

    def create_sql_context(self):
        """Create a sqlContext object on the session. Object will be accessible via variable 'sqlContext'."""
        if self.created_sql_context:
            return
        self.logger.debug(u"Starting '{}' hive session.".format(self.kind))
        self.ipython_display.writeln(u"Creating HiveContext as 'sqlContext'")
        command = self._get_sql_context_creation_command()
        try:
            (success, out) = command.execute(self)
        except LivyClientTimeoutException:
            raise LivyClientTimeoutException(u"Failed to create the SqlContext in time. Timed out after {} seconds."
                                             .format(self._wait_for_idle_timeout_seconds))
        if success:
            self.created_sql_context = True
        else:
            raise FailedToCreateSqlContextException(u"Failed to create the SqlContext.\nError: '{}'".format(out))

    def get_app_id(self):
        if self._app_id is None:
            self._app_id = self._http_client.get_session(self.id).get("appId")
        return self._app_id

    def get_app_info(self):
        appInfo = self._http_client.get_session(self.id).get("appInfo")
        return appInfo if appInfo is not None else {}

    def get_app_info_member(self, member_name):
        return self.get_app_info().get(member_name)

    def get_driver_log_url(self):
        return self.get_app_info_member("driverLogUrl")

    def get_logs(self):
        log_array = self._http_client.get_all_session_logs(self.id)[u'log']
        self._logs = "\n".join(log_array)
        return self._logs

    def get_spark_ui_url(self):
        return self.get_app_info_member("sparkUiUrl")

    @property
    def http_client(self):
        return self._http_client

    @staticmethod
    def is_final_status(status):
        return status in constants.FINAL_STATUS

    def delete(self):
        session_id = self.id
        self._spark_events.emit_session_deletion_start_event(self.guid, self.kind, session_id, self.status)

        try:
            self.logger.debug(u"Deleting session '{}'".format(session_id))

            if self.status != constants.NOT_STARTED_SESSION_STATUS:
                self._http_client.delete_session(session_id)
                self.status = constants.DEAD_SESSION_STATUS
                self.id = -1
            else:
                self.ipython_display.send_error(u"Cannot delete session {} that is in state '{}'."
                                                .format(session_id, self.status))
        except Exception as e:
            self._spark_events.emit_session_deletion_end_event(self.guid, self.kind, session_id, self.status, False,
                                                               e.__class__.__name__, str(e))
            raise
        else:
            self._spark_events.emit_session_deletion_end_event(self.guid, self.kind, session_id, self.status, True, "", "")

    def wait_for_idle(self, seconds_to_wait=None):
        """Wait for session to go to idle status. Sleep meanwhile. Calls done every status_sleep_seconds as
        indicated by the constructor.

        Parameters:
            seconds_to_wait : number of seconds to wait before giving up.
        """
        if seconds_to_wait is None:
            seconds_to_wait = self._wait_for_idle_timeout_seconds

        while True:
            self.refresh_status()
            if self.status == constants.IDLE_SESSION_STATUS:
                return

            if self.status in constants.FINAL_STATUS:
                error = u"Session {} unexpectedly reached final status '{}'."\
                    .format(self.id, self.status)
                self.logger.error(error)
                raise LivyUnexpectedStatusException(u'{} See logs:\n{}'.format(error, self.get_logs()))

            if seconds_to_wait <= 0.0:
                error = u"Session {} did not reach idle status in time. Current status is {}."\
                    .format(self.id, self.status)
                self.logger.error(error)
                raise LivyClientTimeoutException(error)

            start_time = time()
            self.logger.debug(u"Session {} in state {}. Sleeping {} seconds."
                              .format(self.id, self.status, self._status_sleep_seconds))
            sleep(self._status_sleep_seconds)
            seconds_to_wait -= time() - start_time

    def sleep(self):
        sleep(self._statement_sleep_seconds)

    def refresh_status(self):
        status = self._http_client.get_session(self.id)[u'state']

        if status in constants.POSSIBLE_SESSION_STATUS:
            self.status = status
        else:
            raise LivyUnexpectedStatusException(u"Status '{}' not supported by session.".format(status))

        return self.status

    def _get_sql_context_creation_command(self):
        if self.kind == constants.SESSION_KIND_SPARK:
            sql_context_command = u"val sqlContext = new org.apache.spark.sql.hive.HiveContext(sc)"
        elif self.kind == constants.SESSION_KIND_PYSPARK:
            sql_context_command = u"from pyspark.sql import HiveContext\nsqlContext = HiveContext(sc)"
        elif self.kind == constants.SESSION_KIND_SPARKR:
            sql_context_command = u"sqlContext <- sparkRHive.init(sc)"
        else:
            raise BadUserDataException(u"Do not know how to create HiveContext in session of kind {}.".format(self.kind))

        return Command(sql_context_command)
Exemplo n.º 39
0
 def __init__(self, ipython_display):
     self.logger = SparkLog(u"SparkController")
     self.ipython_display = ipython_display
     self.session_manager = SessionManager()
Exemplo n.º 40
0
class LivySession(ObjectWithGuid):
    def __init__(self,
                 http_client,
                 properties,
                 ipython_display,
                 session_id=-1,
                 spark_events=None,
                 heartbeat_timeout=0,
                 heartbeat_thread=None):
        super(LivySession, self).__init__()
        assert constants.LIVY_KIND_PARAM in list(properties.keys())
        kind = properties[constants.LIVY_KIND_PARAM]

        should_heartbeat = False
        if heartbeat_timeout > 0:
            should_heartbeat = True
            properties[
                constants.LIVY_HEARTBEAT_TIMEOUT_PARAM] = heartbeat_timeout
        elif constants.LIVY_HEARTBEAT_TIMEOUT_PARAM in list(properties.keys()):
            properties.pop(constants.LIVY_HEARTBEAT_TIMEOUT_PARAM)

        self.properties = properties
        self.ipython_display = ipython_display
        self._should_heartbeat = should_heartbeat
        self._user_passed_heartbeat_thread = heartbeat_thread

        if spark_events is None:
            spark_events = SparkEvents()
        self._spark_events = spark_events

        self._policy = ConfigurableRetryPolicy(
            retry_seconds_to_sleep_list=[0.2, 0.5, 0.5, 1, 1, 2],
            max_retries=5000)
        wait_for_idle_timeout_seconds = conf.wait_for_idle_timeout_seconds()

        assert wait_for_idle_timeout_seconds > 0

        self.logger = SparkLog(u"LivySession")

        kind = kind.lower()
        if kind not in constants.SESSION_KINDS_SUPPORTED:
            raise BadUserDataException(
                u"Session of kind '{}' not supported. Session must be of kinds {}."
                .format(kind, ", ".join(constants.SESSION_KINDS_SUPPORTED)))

        self._app_id = None
        self._user = None
        self._logs = u""
        self._http_client = http_client
        self._wait_for_idle_timeout_seconds = wait_for_idle_timeout_seconds
        self._printed_resource_warning = False

        self.kind = kind
        self.id = session_id
        self.session_info = u""

        self._heartbeat_thread = None
        if session_id == -1:
            self.status = constants.NOT_STARTED_SESSION_STATUS
        else:
            self.status = constants.BUSY_SESSION_STATUS
            self._start_heartbeat_thread()

    def __str__(self):
        return u"Session id: {}\tYARN id: {}\tKind: {}\tState: {}\n\tSpark UI: {}\n\tDriver Log: {}"\
            .format(self.id, self.get_app_id(), self.kind, self.status, self.get_spark_ui_url(), self.get_driver_log_url())

    def start(self):
        """Start the session against actual livy server."""
        self._spark_events.emit_session_creation_start_event(
            self.guid, self.kind)
        self._printed_resource_warning = False

        try:
            r = self._http_client.post_session(self.properties)
            self.id = r[u"id"]
            self.status = str(r[u"state"])

            self.ipython_display.writeln(u"Starting Spark application")

            # Start heartbeat thread to keep Livy interactive session alive.
            self._start_heartbeat_thread()

            # We wait for livy_session_startup_timeout_seconds() for the session to start up.
            try:
                self.wait_for_idle(conf.livy_session_startup_timeout_seconds())
            except LivyClientTimeoutException:
                raise LivyClientTimeoutException(
                    u"Session {} did not start up in {} seconds.".format(
                        self.id, conf.livy_session_startup_timeout_seconds()))

            html = get_sessions_info_html([self], self.id)
            self.ipython_display.html(html)

            command = Command("spark")
            (success, out, mimetype) = command.execute(self)

            if success:
                self.ipython_display.writeln(
                    u"SparkSession available as 'spark'.")
                self.sql_context_variable_name = "spark"
            else:
                command = Command("sqlContext")
                (success, out, mimetype) = command.execute(self)
                if success:
                    self.ipython_display.writeln(
                        u"SparkContext available as 'sc'.")
                    if ("hive" in out.lower()):
                        self.ipython_display.writeln(
                            u"HiveContext available as 'sqlContext'.")
                    else:
                        self.ipython_display.writeln(
                            u"SqlContext available as 'sqlContext'.")
                    self.sql_context_variable_name = "sqlContext"
                else:
                    raise SqlContextNotFoundException(
                        u"Neither SparkSession nor HiveContext/SqlContext is available."
                    )
        except Exception as e:
            self._spark_events.emit_session_creation_end_event(
                self.guid, self.kind, self.id, self.status, False,
                e.__class__.__name__, str(e))
            raise
        else:
            self._spark_events.emit_session_creation_end_event(
                self.guid, self.kind, self.id, self.status, True, "", "")

    def get_app_id(self):
        if self._app_id is None:
            self._app_id = self._http_client.get_session(self.id).get("appId")
        return self._app_id

    def get_app_info(self):
        appInfo = self._http_client.get_session(self.id).get("appInfo")
        return appInfo if appInfo is not None else {}

    def get_app_info_member(self, member_name):
        return self.get_app_info().get(member_name)

    def get_driver_log_url(self):
        return self.get_app_info_member("driverLogUrl")

    def get_logs(self):
        log_array = self._http_client.get_all_session_logs(self.id)[u'log']
        self._logs = "\n".join(log_array)
        return self._logs

    def get_spark_ui_url(self):
        return self.get_app_info_member("sparkUiUrl")

    def get_user(self):
        if self._user is None:
            session = self._http_client.get_session(self.id)
            self._user = session.get("proxyUser", session.get("owner"))
        return self._user

    @property
    def http_client(self):
        return self._http_client

    @property
    def endpoint(self):
        return self._http_client.endpoint

    @staticmethod
    def is_final_status(status):
        return status in constants.FINAL_STATUS

    def delete(self):
        session_id = self.id
        self._spark_events.emit_session_deletion_start_event(
            self.guid, self.kind, session_id, self.status)

        try:
            self.logger.debug(u"Deleting session '{}'".format(session_id))

            if self.status != constants.NOT_STARTED_SESSION_STATUS:
                self._http_client.delete_session(session_id)
                self._stop_heartbeat_thread()
                self.status = constants.DEAD_SESSION_STATUS
                self.id = -1
            else:
                self.ipython_display.send_error(
                    u"Cannot delete session {} that is in state '{}'.".format(
                        session_id, self.status))

        except Exception as e:
            self._spark_events.emit_session_deletion_end_event(
                self.guid, self.kind, session_id, self.status, False,
                e.__class__.__name__, str(e))
            raise
        else:
            self._spark_events.emit_session_deletion_end_event(
                self.guid, self.kind, session_id, self.status, True, "", "")

    def wait_for_idle(self, seconds_to_wait=None):
        """Wait for session to go to idle status. Sleep meanwhile.

        Parameters:
            seconds_to_wait : number of seconds to wait before giving up.
        """
        if seconds_to_wait is None:
            seconds_to_wait = self._wait_for_idle_timeout_seconds

        retries = 1
        while True:
            self.refresh_status_and_info()
            if self.status == constants.IDLE_SESSION_STATUS:
                return

            if self.status in constants.FINAL_STATUS:
                error = u"Session {} unexpectedly reached final status '{}'."\
                    .format(self.id, self.status)
                self.logger.error(error)
                raise LivyUnexpectedStatusException(u'{} See logs:\n{}'.format(
                    error, self.get_logs()))

            if seconds_to_wait <= 0.0:
                error = u"Session {} did not reach idle status in time. Current status is {}."\
                    .format(self.id, self.status)
                self.logger.error(error)
                raise LivyClientTimeoutException(error)

            if constants.YARN_RESOURCE_LIMIT_MSG in self.session_info and \
                not self._printed_resource_warning:
                self.ipython_display.send_error(constants.RESOURCE_LIMIT_WARNING\
                                                .format(conf.resource_limit_mitigation_suggestion()))
                self._printed_resource_warning = True

            start_time = time()
            sleep_time = self._policy.seconds_to_sleep(retries)
            retries += 1

            self.logger.debug(
                u"Session {} in state {}. Sleeping {} seconds.".format(
                    self.id, self.status, sleep_time))
            sleep(sleep_time)
            seconds_to_wait -= time() - start_time

    def sleep(self, retries):
        sleep(self._policy.seconds_to_sleep(retries))

    # This function will refresh the status and get the logs in a single call.
    # Only the status will be returned as the return value.
    def refresh_status_and_info(self):
        response = self._http_client.get_session(self.id)
        status = response[u'state']
        log_array = response[u'log']

        if status in constants.POSSIBLE_SESSION_STATUS:
            self.status = status
            self.session_info = u"\n".join(log_array)
        else:
            raise LivyUnexpectedStatusException(
                u"Status '{}' not supported by session.".format(status))

    def _start_heartbeat_thread(self):
        if self._should_heartbeat and self._heartbeat_thread is None:
            refresh_seconds = conf.heartbeat_refresh_seconds()
            retry_seconds = conf.heartbeat_retry_seconds()

            if self._user_passed_heartbeat_thread is None:
                self._heartbeat_thread = _HeartbeatThread(
                    self, refresh_seconds, retry_seconds)
            else:
                self._heartbeat_thread = self._user_passed_heartbeat_thread

            self._heartbeat_thread.daemon = True
            self._heartbeat_thread.start()

    def _stop_heartbeat_thread(self):
        if self._heartbeat_thread is not None:
            self._heartbeat_thread.stop()
            self._heartbeat_thread = None

    def get_row_html(self, current_session_id):
        return u"""<tr><td>{0}</td><td>{1}</td><td>{2}</td><td>{3}</td><td>{4}</td><td>{5}</td><td>{6}</td><td>{7}</td></tr>""".format(
            self.id, self.get_app_id(), self.kind, self.status,
            self.get_html_link(u'Link', self.get_spark_ui_url()),
            self.get_html_link(u'Link', self.get_driver_log_url()),
            self.get_user(), u"" if current_session_id is None
            or current_session_id != self.id else u"\u2714")

    @staticmethod
    def get_html_link(text, url):
        if url is not None:
            return u"""<a target="_blank" href="{1}">{0}</a>""".format(
                text, url)
        else:
            return u""
Exemplo n.º 41
0
    def __init__(self):
        self.logger = SparkLog(u"SessionManager")

        self._sessions = dict()
Exemplo n.º 42
0
class ReliableHttpClient(object):
    """Http client that is reliable in its requests. Uses requests library."""
    def __init__(self, endpoint, headers, retry_policy):
        self._endpoint = endpoint
        self._headers = headers
        self._retry_policy = retry_policy
        self.logger = SparkLog(u"ReliableHttpClient")

        self.verify_ssl = not conf.ignore_ssl_errors()
        if not self.verify_ssl:
            self.logger.debug(
                u"ATTENTION: Will ignore SSL errors. This might render you vulnerable to attacks."
            )
            requests.packages.urllib3.disable_warnings()

    def compose_url(self, relative_url):
        r_u = "/{}".format(relative_url.rstrip(u"/").lstrip(u"/"))
        return self._endpoint.url + r_u

    def get(self, relative_url, accepted_status_codes):
        """Sends a get request. Returns a response."""
        return self._send_request(relative_url, accepted_status_codes,
                                  requests.get)

    def post(self, relative_url, accepted_status_codes, data):
        """Sends a post request. Returns a response."""
        return self._send_request(relative_url, accepted_status_codes,
                                  requests.post, data)

    def delete(self, relative_url, accepted_status_codes):
        """Sends a delete request. Returns a response."""
        return self._send_request(relative_url, accepted_status_codes,
                                  requests.delete)

    def _send_request(self,
                      relative_url,
                      accepted_status_codes,
                      function,
                      data=None):
        return self._send_request_helper(self.compose_url(relative_url),
                                         accepted_status_codes, function, data,
                                         0)

    def _send_request_helper(self, url, accepted_status_codes, function, data,
                             retry_count):
        while True:
            try:
                if not self._endpoint.authenticate:
                    if data is None:
                        r = function(url,
                                     headers=self._headers,
                                     verify=self.verify_ssl)
                    else:
                        r = function(url,
                                     headers=self._headers,
                                     data=json.dumps(data),
                                     verify=self.verify_ssl)
                else:
                    if data is None:
                        r = function(url,
                                     headers=self._headers,
                                     auth=(self._endpoint.username,
                                           self._endpoint.password),
                                     verify=self.verify_ssl)
                    else:
                        r = function(url,
                                     headers=self._headers,
                                     auth=(self._endpoint.username,
                                           self._endpoint.password),
                                     data=json.dumps(data),
                                     verify=self.verify_ssl)
            except requests.exceptions.RequestException as e:
                error = True
                r = None
                status = None

                self.logger.error(u"Request to '{}' failed with '{}'".format(
                    url, e))
            else:
                error = False
                status = r.status_code

            if error or status not in accepted_status_codes:
                if self._retry_policy.should_retry(status, error, retry_count):
                    sleep(self._retry_policy.seconds_to_sleep(retry_count))
                    retry_count += 1
                    continue
                else:
                    raise HttpClientException(
                        u"Invalid status code '{}' or error '{}' from {}".
                        format(status, error, url))
            return r
Exemplo n.º 43
0
class ReliableHttpClient(object):
    """Http client that is reliable in its requests. Uses requests library."""

    def __init__(self, endpoint, headers, retry_policy):
        self._endpoint = endpoint
        self._headers = headers
        self._retry_policy = retry_policy
        self.logger = SparkLog(u"ReliableHttpClient")

        self.verify_ssl = not conf.ignore_ssl_errors()
        if not self.verify_ssl:
            self.logger.debug(u"ATTENTION: Will ignore SSL errors. This might render you vulnerable to attacks.")
            requests.packages.urllib3.disable_warnings()

    def compose_url(self, relative_url):
        r_u = "/{}".format(relative_url.rstrip(u"/").lstrip(u"/"))
        return self._endpoint.url + r_u

    def get(self, relative_url, accepted_status_codes):
        """Sends a get request. Returns a response."""
        return self._send_request(relative_url, accepted_status_codes, requests.get)

    def post(self, relative_url, accepted_status_codes, data):
        """Sends a post request. Returns a response."""
        return self._send_request(relative_url, accepted_status_codes, requests.post, data)

    def delete(self, relative_url, accepted_status_codes):
        """Sends a delete request. Returns a response."""
        return self._send_request(relative_url, accepted_status_codes, requests.delete)

    def _send_request(self, relative_url, accepted_status_codes, function, data=None):
        return self._send_request_helper(self.compose_url(relative_url), accepted_status_codes, function, data, 0)

    def _send_request_helper(self, url, accepted_status_codes, function, data, retry_count):
        while True:
            try:
                if not self._endpoint.authenticate:
                    if data is None:
                        r = function(url, headers=self._headers, verify=self.verify_ssl)
                    else:
                        r = function(url, headers=self._headers, data=json.dumps(data), verify=self.verify_ssl)
                else:
                    if data is None:
                        r = function(url, headers=self._headers, auth=(self._endpoint.username, self._endpoint.password),
                                     verify=self.verify_ssl)
                    else:
                        r = function(url, headers=self._headers, auth=(self._endpoint.username, self._endpoint.password),
                                     data=json.dumps(data), verify=self.verify_ssl)
            except requests.exceptions.RequestException as e:
                error = True
                r = None
                status = None

                self.logger.error(u"Request to '{}' failed with '{}'".format(url, e))
            else:
                error = False
                status = r.status_code

            if error or status not in accepted_status_codes:
                if self._retry_policy.should_retry(status, error, retry_count):
                    sleep(self._retry_policy.seconds_to_sleep(retry_count))
                    retry_count += 1
                    continue
                else:
                    raise HttpClientException(u"Invalid status code '{}' or error '{}' from {}"
                                              .format(status, error, url))
            return r
Exemplo n.º 44
0
 def __init__(self, ipython_display):
     self.logger = SparkLog(u"SparkController")
     self.ipython_display = ipython_display
     self.session_manager = SessionManager()
Exemplo n.º 45
0
class SparkMagicBase(Magics):
    def __init__(self, shell, data=None, spark_events=None):
        # You must call the parent constructor
        super(SparkMagicBase, self).__init__(shell)

        self.logger = SparkLog(u"SparkMagics")
        self.ipython_display = IpythonDisplay()
        self.spark_controller = SparkController(self.ipython_display)

        self.logger.debug("Initialized spark magics.")

        if spark_events is None:
            spark_events = SparkEvents()
        spark_events.emit_library_loaded_event()

    def execute_final(self, cell, output_var, samplemethod, maxrows,
                      samplefraction, session_name, coerce):
        (success,
         out) = self.spark_controller.run_command(Command(cell), session_name)
        if not success:
            self.ipython_display.send_error(out)
        else:
            self.ipython_display.write(out)
            if output_var is not None:
                spark_store_command = self._spark_store_command(
                    output_var, samplemethod, maxrows, samplefraction, coerce)
                df = self.spark_controller.run_command(spark_store_command,
                                                       session_name)
                self.shell.user_ns[output_var] = df

    def execute_spark(self, cell, output_var, samplemethod, maxrows,
                      samplefraction, session_name, coerce):

        if "lagom as" in cell:
            self.ipython_display.send_error(
                "You are not allowed to do the following: 'import maggy.experiment.lagom as ...'. Please, just use 'import maggy.experiment as experiment' (or something else)"
            )
            raise
        elif ".lagom" in cell:
            client = Client(self.spark_controller, self.session_name, 5,
                            self.ipython_display)
            try:
                client.start_heartbeat()
                if DEBUG:
                    self.ipython_display.writeln("Started heartbeating...")
                self.execute_final(cell, output_var, samplemethod, maxrows,
                                   samplefraction, session_name, coerce)
            except:
                raise
            finally:
                # 4. Kill thread before leaving current scope
                client.stop()
                try:
                    client.close()
                except:
                    if DEBUG:
                        print("Socket already closed by maggy server.")
                    pass
        else:
            self.execute_final(cell, output_var, samplemethod, maxrows,
                               samplefraction, session_name, coerce)

    @staticmethod
    def _spark_store_command(output_var, samplemethod, maxrows, samplefraction,
                             coerce):
        return SparkStoreCommand(output_var,
                                 samplemethod,
                                 maxrows,
                                 samplefraction,
                                 coerce=coerce)

    def execute_sqlquery(self, cell, samplemethod, maxrows, samplefraction,
                         session, output_var, quiet, coerce):
        sqlquery = self._sqlquery(cell, samplemethod, maxrows, samplefraction,
                                  coerce)
        df = self.spark_controller.run_sqlquery(sqlquery, session)
        if output_var is not None:
            self.shell.user_ns[output_var] = df
        if quiet:
            return None
        else:
            return df

    @staticmethod
    def _sqlquery(cell, samplemethod, maxrows, samplefraction, coerce):
        return SQLQuery(cell,
                        samplemethod,
                        maxrows,
                        samplefraction,
                        coerce=coerce)

    def _print_endpoint_info(self, info_sessions, current_session_id):
        if info_sessions:
            info_sessions = sorted(info_sessions, key=lambda s: s.id)
            html = get_sessions_info_html(info_sessions, current_session_id)
            self.ipython_display.html(html)
        else:
            self.ipython_display.html(u'No active sessions.')