def test_add_sessions_command_parses(): # Do not skip and python add_sessions_mock = MagicMock() spark_controller.add_session = add_sessions_mock command = "add" name = "-s name" language = "-l python" connection_string = "-u http://url.com -t {} -a sdf -p w".format( AUTH_BASIC) line = " ".join([command, name, language, connection_string]) magic.spark(line) args = parse_argstring_or_throw(RemoteSparkMagics.spark, line) add_sessions_mock.assert_called_once_with( "name", Endpoint("http://url.com", initialize_auth(args)), False, {"kind": "pyspark"}) # Skip and scala - upper case add_sessions_mock = MagicMock() spark_controller.add_session = add_sessions_mock command = "add" name = "-s name" language = "-l scala" connection_string = "--url http://location:port" line = " ".join([command, name, language, connection_string, "-k"]) magic.spark(line) args = parse_argstring_or_throw(RemoteSparkMagics.spark, line) args.auth = NO_AUTH add_sessions_mock.assert_called_once_with( "name", Endpoint("http://location:port", initialize_auth(args)), True, {"kind": "spark"})
def logs(self, line, cell="", local_ns=None): parse_argstring_or_throw(self.logs, line) self._assure_cell_body_is_empty(KernelMagics.logs.__name__, cell) if self.session_started: out = self.spark_controller.get_logs() self.ipython_display.write(out) else: self.ipython_display.write(u"No logs yet.")
def logs(self, line, cell="", local_ns=None): parse_argstring_or_throw(self.logs, line) self._assure_cell_body_is_empty(KernelMagics.logs.__name__, cell) if self.session_started: out = self.spark_controller.get_logs() self.ipython_display.write(out) else: self.ipython_display.write(u"No logs yet.")
def spark(self, line, cell="", local_ns=None): parse_argstring_or_throw(self.spark, line) if self._do_not_call_start_session(u""): (success, out) = self.spark_controller.run_command(Command(cell)) if success: self.ipython_display.write(out) else: self.ipython_display.send_error(out) else: return None
def spark(self, line, cell="", local_ns=None): parse_argstring_or_throw(self.spark, line) if self._do_not_call_start_session(u""): (success, out) = self.spark_controller.run_command(Command(cell)) if success: self.ipython_display.write(out) else: self.ipython_display.send_error(out) else: return None
def info(self, line, cell=u"", local_ns=None): parse_argstring_or_throw(self.info, line) self._assure_cell_body_is_empty(KernelMagics.info.__name__, cell) if self.session_started: current_session_id = self.spark_controller.get_session_id_for_client(self.session_name) else: current_session_id = None self.ipython_display.html(u"Current session configs: <tt>{}</tt><br>".format(conf.get_session_properties(self.language))) info_sessions = self.spark_controller.get_all_sessions_endpoint(self.endpoint) self._print_endpoint_info(info_sessions, current_session_id)
def info(self, line, cell=u"", local_ns=None): parse_argstring_or_throw(self.info, line) self._assure_cell_body_is_empty(KernelMagics.info.__name__, cell) if self.session_started: current_session_id = self.spark_controller.get_session_id_for_client(self.session_name) else: current_session_id = None self.ipython_display.html(u"Current session configs: <tt>{}</tt><br>".format(conf.get_session_properties(self.language))) info_sessions = self.spark_controller.get_all_sessions_endpoint(self.endpoint) self._print_endpoint_info(info_sessions, current_session_id)
def test_parse_argstring_or_throw(): parse_argstring = MagicMock(side_effect=UsageError('OOGABOOGABOOGA')) try: parse_argstring_or_throw(MagicMock(), MagicMock(), parse_argstring=parse_argstring) assert False except BadUserDataException as e: assert_equals(str(e), str(parse_argstring.side_effect)) parse_argstring = MagicMock(side_effect=ValueError('AN UNKNOWN ERROR HAPPENED')) try: parse_argstring_or_throw(MagicMock(), MagicMock(), parse_argstring=parse_argstring) assert False except ValueError as e: assert_is(e, parse_argstring.side_effect)
def test_parse_argstring_or_throw(): parse_argstring = MagicMock(side_effect=UsageError('OOGABOOGABOOGA')) try: parse_argstring_or_throw(MagicMock(), MagicMock(), parse_argstring=parse_argstring) assert False except BadUserDataException as e: assert_equals(str(e), str(parse_argstring.side_effect)) parse_argstring = MagicMock(side_effect=ValueError('AN UNKNOWN ERROR HAPPENED')) try: parse_argstring_or_throw(MagicMock(), MagicMock(), parse_argstring=parse_argstring) assert False except ValueError as e: assert_is(e, parse_argstring.side_effect)
def test_cleanup_endpoint_command_parses(): mock_method = MagicMock() spark_controller.cleanup_endpoint = mock_method line = "cleanup -u endp" magic.spark(line) args = parse_argstring_or_throw(RemoteSparkMagics.spark, line) args.auth = NO_AUTH mock_method.assert_called_once_with(Endpoint("endp", initialize_auth(args))) line = "cleanup -u endp -a user -p passw -t {}".format(AUTH_BASIC) magic.spark(line) args = parse_argstring_or_throw(RemoteSparkMagics.spark, line) mock_method.assert_called_with(Endpoint("endp", initialize_auth(args)))
def sql(self, line, cell="", local_ns=None): if self._do_not_call_start_session(""): args = parse_argstring_or_throw(self.sql, line) return self.execute_sqlquery(cell, args.samplemethod, args.maxrows, args.samplefraction, None, args.output, args.quiet) else: return
def sql(self, line, cell="", local_ns=None): if self._do_not_call_start_session(""): args = parse_argstring_or_throw(self.sql, line) return self.execute_sqlquery(cell, args.samplemethod, args.maxrows, args.samplefraction, None, args.output, args.quiet) else: return
def spark(self, line, cell="", local_ns=None): if self._do_not_call_start_session(u""): args = parse_argstring_or_throw(self.spark, line) self.execute_spark(cell, args.output, args.samplemethod, args.maxrows, args.samplefraction, None) else: return
def delete(self, line, cell="", local_ns=None): self._assure_cell_body_is_empty(KernelMagics.delete.__name__, cell) args = parse_argstring_or_throw(self.delete, line) session = args.session if args.session is None: self.ipython_display.send_error( u'You must provide a session ID (-s argument).') return if args.force: id = self.spark_controller.get_session_id_for_client( self.session_name) if session == id: self.ipython_display.send_error( u"Cannot delete this kernel's session ({}). Specify a different session," u" shutdown the kernel to delete this session, or run %cleanup to " u"delete all sessions for this endpoint.".format(id)) return self.spark_controller.delete_session_by_id(self.endpoint, session) else: self.ipython_display.send_error( u"Include the -f parameter if you understand that all statements executed " u"in this session will be lost.")
def _do_not_call_change_endpoint(self, line, cell="", local_ns=None): args = parse_argstring_or_throw(self._do_not_call_change_endpoint, line) if self.session_started: error = u"Cannot change the endpoint if a session has been started." raise BadUserDataException(error) auth = initialize_auth(args=args) self.endpoint = Endpoint(args.url, auth)
def spark(self, line, cell="", local_ns=None): if not self._do_not_call_start_session(u""): return args = parse_argstring_or_throw(self.spark, line) coerce = get_coerce_value(args.coerce) self.execute_spark(cell, args.output, args.samplemethod, args.maxrows, args.samplefraction, None, coerce)
def _do_not_call_change_endpoint(self, line, cell="", local_ns=None): args = parse_argstring_or_throw(self._do_not_call_change_endpoint, line) username = args.username password = args.password server = args.server if self.session_started: error = u"Cannot change the endpoint if a session has been started." raise BadUserDataException(error) self.endpoint = Endpoint(server, username, password)
def _do_not_call_init_livy_session(self, line, cell="", local_ns=None): args = parse_argstring_or_throw(self._do_not_call_init_livy_session, line) language = args.language.lower() if language not in LANGS_SUPPORTED: self.ipython_display.send_error( u"'{}' language not supported in kernel magics.".format( language)) return # init livy session self.init_livy_session(language)
def send_to_spark(self, line, cell=u"", local_ns=None): self._assure_cell_body_is_empty(KernelMagics.send_to_spark.__name__, cell) args = parse_argstring_or_throw(self.send_to_spark, line) if not args.input: raise BadUserDataException("-i param not provided.") if self._do_not_call_start_session(""): self.do_send_to_spark(cell, args.input, args.vartype, args.varname, args.maxrows, None) else: return
def _do_not_call_change_endpoint(self, line, cell="", local_ns=None): args = parse_argstring_or_throw(self._do_not_call_change_endpoint, line) username = args.username password = args.password server = args.server if self.session_started: error = u"Cannot change the endpoint if a session has been started." raise BadUserDataException(error) self.endpoint = Endpoint(server, username, password)
def cleanup(self, line, cell="", local_ns=None): self._assure_cell_body_is_empty(KernelMagics.cleanup.__name__, cell) args = parse_argstring_or_throw(self.cleanup, line) if args.force: self._do_not_call_delete_session(u"") self.spark_controller.cleanup_endpoint(self.endpoint) else: self.ipython_display.send_error(u"When you clean up the endpoint, all sessions will be lost, including the " u"one used for this notebook. Include the -f parameter if that's your " u"intention.") return
def cleanup(self, line, cell="", local_ns=None): self._assure_cell_body_is_empty(KernelMagics.cleanup.__name__, cell) args = parse_argstring_or_throw(self.cleanup, line) if args.force: self._do_not_call_delete_session(u"") self.spark_controller.cleanup_endpoint(self.endpoint) else: self.ipython_display.send_error(u"When you clean up the endpoint, all sessions will be lost, including the " u"one used for this notebook. Include the -f parameter if that's your " u"intention.") return
def test_add_sessions_command_raises_google_no_account(): with patch('google.auth.default', return_value=(creds, 'project'), \ autospec=True): add_sessions_mock = MagicMock() spark_controller.add_session = add_sessions_mock command = "add" name = "-s name" language = "-l python" connection_string = "-u http://url.com -t Google" line = " ".join([command, name, language, connection_string]) magic.spark(line) args = parse_argstring_or_throw(DataprocMagics.spark, line) initialize_auth(args)
def _do_not_call_change_language(self, line, cell="", local_ns=None): args = parse_argstring_or_throw(self._do_not_call_change_language, line) language = args.language.lower() if language not in LANGS_SUPPORTED: self.ipython_display.send_error(u"'{}' language not supported in kernel magics.".format(language)) return if self.session_started: self.ipython_display.send_error(u"Cannot change the language if a session has been started.") return self.language = language self.refresh_configuration()
def _do_not_call_change_language(self, line, cell="", local_ns=None): args = parse_argstring_or_throw(self._do_not_call_change_language, line) language = args.language.lower() if language not in LANGS_SUPPORTED: self.ipython_display.send_error(u"'{}' language not supported in kernel magics.".format(language)) return if self.session_started: self.ipython_display.send_error(u"Cannot change the language if a session has been started.") return self.language = language self.refresh_configuration()
def test_delete_sessions_command_parses(): mock_method = MagicMock() spark_controller.delete_session_by_name = mock_method command = "delete -s name" magic.spark(command) mock_method.assert_called_once_with("name") command = "delete -u URL -t {} -a username -p password -i 4".format( AUTH_BASIC) mock_method = MagicMock() spark_controller.delete_session_by_id = mock_method magic.spark(command) args = parse_argstring_or_throw(RemoteSparkMagics.spark, command) mock_method.assert_called_once_with(Endpoint("URL", initialize_auth(args)), 4)
def test_add_sessions_command_parses_kerberos(): # Do not skip and python add_sessions_mock = MagicMock() spark_controller.add_session = add_sessions_mock command = "add" name = "-s name" language = "-l python" connection_string = "-u http://url.com -t {}".format('Kerberos') line = " ".join([command, name, language, connection_string]) magic.spark(line) args = parse_argstring_or_throw(RemoteSparkMagics.spark, line) auth_instance = initialize_auth(args) add_sessions_mock.assert_called_once_with( "name", Endpoint("http://url.com", initialize_auth(args)), False, {"kind": "pyspark"}) assert_equals(auth_instance.url, "http://url.com")
def test_add_sessions_command_exception(): # Do not skip and python add_sessions_mock = MagicMock(side_effect=BadUserDataException('hehe')) spark_controller.add_session = add_sessions_mock command = "add" name = "-s name" language = "-l python" connection_string = "-u http://url.com -t {} -a sdf -p w".format( AUTH_BASIC) line = " ".join([command, name, language, connection_string]) magic.spark(line) args = parse_argstring_or_throw(RemoteSparkMagics.spark, line) add_sessions_mock.assert_called_once_with( "name", Endpoint("http://url.com", initialize_auth(args)), False, {"kind": "pyspark"}) ipython_display.send_error.assert_called_once_with( EXPECTED_ERROR_MSG.format(add_sessions_mock.side_effect))
def _do_not_call_change_endpoint(self, line, cell="", local_ns=None): args = parse_argstring_or_throw(self._do_not_call_change_endpoint, line) username = args.username password = args.password server = args.server auth = args.auth mutualauth = args.krb_mutual_auth kerberoshostname = args.krb_host_override if self.session_started: error = u"Cannot change the endpoint if a session has been started." raise BadUserDataException(error) self.endpoint = Endpoint(server, auth, username, password, krb_mutual_auth=mutualauth, krb_host_override=kerberoshostname)
def configure(self, line, cell="", local_ns=None): try: dictionary = json.loads(cell) except ValueError: self.ipython_display.send_error(u"Could not parse JSON object from input '{}'".format(cell)) return args = parse_argstring_or_throw(self.configure, line) if self.session_started: if not args.force: self.ipython_display.send_error(u"A session has already been started. If you intend to recreate the " u"session with new configurations, please include the -f argument.") return else: self._do_not_call_delete_session(u"") self._override_session_settings(dictionary) self._do_not_call_start_session(u"") else: self._override_session_settings(dictionary) self.info(u"")
def configure(self, line, cell="", local_ns=None): try: dictionary = json.loads(cell) except ValueError: self.ipython_display.send_error(u"Could not parse JSON object from input '{}'".format(cell)) return args = parse_argstring_or_throw(self.configure, line) if self.session_started: if not args.force: self.ipython_display.send_error(u"A session has already been started. If you intend to recreate the " u"session with new configurations, please include the -f argument.") return else: self._do_not_call_delete_session(u"") self._override_session_settings(dictionary) self._do_not_call_start_session(u"") else: self._override_session_settings(dictionary) self.info(u"")
def test_add_sessions_command_parses_google_default_credentials(): with patch('google.auth.default', return_value=(creds, 'project'), \ autospec=True): add_sessions_mock = MagicMock() spark_controller.add_session = add_sessions_mock command = "add" name = "-s name" language = "-l python" account = "-g default-credentials" connection_string = "-u http://url.com -t Google" line = " ".join([command, name, language, connection_string, account]) magic.spark(line) args = parse_argstring_or_throw(DataprocMagics.spark, line) auth_instance = initialize_auth(args) add_sessions_mock.assert_called_once_with( "name", Endpoint("http://url.com", initialize_auth(args)), False, {"kind": "pyspark"}) assert_equals(auth_instance.url, "http://url.com") isinstance(auth_instance, GoogleAuth) assert_equals(auth_instance.active_credentials, 'default-credentials')
def delete(self, line, cell="", local_ns=None): self._assure_cell_body_is_empty(KernelMagics.delete.__name__, cell) args = parse_argstring_or_throw(self.delete, line) session = args.session if args.session is None: self.ipython_display.send_error(u'You must provide a session ID (-s argument).') return if args.force: id = self.spark_controller.get_session_id_for_client(self.session_name) if session == id: self.ipython_display.send_error(u"Cannot delete this kernel's session ({}). Specify a different session," u" shutdown the kernel to delete this session, or run %cleanup to " u"delete all sessions for this endpoint.".format(id)) return self.spark_controller.delete_session_by_id(self.endpoint, session) else: self.ipython_display.send_error(u"Include the -f parameter if you understand that all statements executed " u"in this session will be lost.")
def test_add_sessions_command_parses_google_user_credentials(): with patch('sparkmagic.auth.google.list_credentialed_user_accounts', \ return_value=mock_credentialed_accounts_valid_accounts), patch('subprocess.check_output',\ return_value=AUTH_DESCRIBE_USER): add_sessions_mock = MagicMock() spark_controller.add_session = add_sessions_mock command = "add" name = "-s name" language = "-l python" account = "-g [email protected]" connection_string = "-u http://url.com -t Google" line = " ".join([command, name, language, connection_string, account]) magic.spark(line) args = parse_argstring_or_throw(DataprocMagics.spark, line) auth_instance = initialize_auth(args) add_sessions_mock.assert_called_once_with( "name", Endpoint("http://url.com", initialize_auth(args)), False, {"kind": "pyspark"}) assert_equals(auth_instance.url, "http://url.com") isinstance(auth_instance, GoogleAuth) assert_equals(auth_instance.active_credentials, '*****@*****.**')
def test_add_sessions_command_extra_properties(): conf.override_all({}) magic.spark("config", "{\"extra\": \"yes\"}") assert conf.session_configs() == {"extra": "yes"} add_sessions_mock = MagicMock() spark_controller.add_session = add_sessions_mock command = "add" name = "-s name" language = "-l scala" connection_string = "-u http://livyendpoint.com" line = " ".join([command, name, language, connection_string]) magic.spark(line) args = parse_argstring_or_throw(RemoteSparkMagics.spark, line) args.auth = NO_AUTH add_sessions_mock.assert_called_once_with( "name", Endpoint("http://livyendpoint.com", initialize_auth(args)), False, { "kind": "spark", "extra": "yes" }) conf.override_all({})
def spark(self, line, cell="", local_ns=None): """Magic to execute spark remotely. This magic allows you to create a Livy Scala or Python session against a Livy endpoint. Every session can be used to execute either Spark code or SparkSQL code by executing against the SQL context in the session. When the SQL context is used, the result will be a Pandas dataframe of a sample of the results. If invoked with no subcommand, the cell will be executed against the specified session. Subcommands ----------- info Display the available Livy sessions and other configurations for sessions. add Add a Livy session given a session name (-s), language (-l), and endpoint credentials. The -k argument, if present, will skip adding this session if it already exists. e.g. `%spark add -s test -l python -u https://sparkcluster.net/livy -t Kerberos -a u -p -k` config Override the livy session properties sent to Livy on session creation. All session creations will contain these config settings from then on. Expected value is a JSON key-value string to be sent as part of the Request Body for the POST /sessions endpoint in Livy. e.g. `%%spark config` `{"driverMemory":"1000M", "executorCores":4}` run Run Spark code against a session. e.g. `%%spark -s testsession` will execute the cell code against the testsession previously created e.g. `%%spark -s testsession -c sql` will execute the SQL code against the testsession previously created e.g. `%%spark -s testsession -c sql -o my_var` will execute the SQL code against the testsession previously created and store the pandas dataframe created in the my_var variable in the Python environment. logs Returns the logs for a given session. e.g. `%spark logs -s testsession` will return the logs for the testsession previously created delete Delete a Livy session. e.g. `%spark delete -s defaultlivy` cleanup Delete all Livy sessions created by the notebook. No arguments required. e.g. `%spark cleanup` """ usage = "Please look at usage of %spark by executing `%spark?`." user_input = line args = parse_argstring_or_throw(self.spark, user_input) subcommand = args.command[0].lower() if args.auth is None: args.auth = conf.get_auth_value(args.user, args.password) else: args.auth = args.auth # info if subcommand == "info": if args.url is not None: endpoint = Endpoint(args.url, args.auth, args.user, args.password) info_sessions = self.spark_controller.get_all_sessions_endpoint_info(endpoint) self._print_endpoint_info(info_sessions) else: self._print_local_info() # config elif subcommand == "config": conf.override(conf.session_configs.__name__, json.loads(cell)) #conf file elif subcommand == "encoded": language = args.language session = args.session conf_json = json.loads(base64.b64decode(args.encodedconf).decode('utf-8')) lang_args = conf_json['kernel_{}_credentials'.format(language)] url = lang_args['url'] auth = lang_args['auth'] username = lang_args['username'] password = lang_args['base64_password'] conf.override_all(conf_json) properties = conf.get_session_properties(language) if url is not None: endpoint = Endpoint(url, auth, username, password) info_sessions = self.spark_controller.get_all_sessions_endpoint_info(endpoint) if session in info_sessions: print("found session") else: self.spark_controller.add_session(session, endpoint, True, properties) coerce = get_coerce_value(args.coerce) if args.context == CONTEXT_NAME_SPARK: return self.execute_spark(cell, args.output, args.samplemethod, args.maxrows, args.samplefraction, session, coerce) elif args.context == CONTEXT_NAME_SQL: return self.execute_sqlquery(cell, args.samplemethod, args.maxrows, args.samplefraction, session, args.output, args.quiet, coerce) else: self.ipython_display.send_error("Context '{}' not found".format(args.context)) #check if session already exists # add elif subcommand == "add": if args.url is None: self.ipython_display.send_error("Need to supply URL argument (e.g. -u https://example.com/livyendpoint)") return name = args.session language = args.language endpoint = Endpoint(args.url, args.auth, args.user, args.password) skip = args.skip properties = conf.get_session_properties(language) self.spark_controller.add_session(name, endpoint, skip, properties) # delete elif subcommand == "delete": if args.session is not None: self.spark_controller.delete_session_by_name(args.session) elif args.url is not None: if args.id is None: self.ipython_display.send_error("Must provide --id or -i option to delete session at endpoint from URL") return endpoint = Endpoint(args.url, args.auth, args.user, args.password) session_id = args.id self.spark_controller.delete_session_by_id(endpoint, session_id) else: self.ipython_display.send_error("Subcommand 'delete' requires a session name or a URL and session ID") # cleanup elif subcommand == "cleanup": if args.url is not None: endpoint = Endpoint(args.url, args.auth, args.user, args.password) self.spark_controller.cleanup_endpoint(endpoint) else: self.spark_controller.cleanup() # logs elif subcommand == "logs": self.ipython_display.write(self.spark_controller.get_logs(args.session)) # run elif len(subcommand) == 0: coerce = get_coerce_value(args.coerce) if args.context == CONTEXT_NAME_SPARK: return self.execute_spark(cell, args.output, args.samplemethod, args.maxrows, args.samplefraction, args.session, coerce) elif args.context == CONTEXT_NAME_SQL: return self.execute_sqlquery(cell, args.samplemethod, args.maxrows, args.samplefraction, args.session, args.output, args.quiet, coerce) else: self.ipython_display.send_error("Context '{}' not found".format(args.context)) # error else: self.ipython_display.send_error("Subcommand '{}' not found. {}".format(subcommand, usage))
def spark(self, line, cell="", local_ns=None): """Magic to execute spark remotely. This magic allows you to create a Livy Scala or Python session against a Livy endpoint. Every session can be used to execute either Spark code or SparkSQL code by executing against the SQL context in the session. When the SQL context is used, the result will be a Pandas dataframe of a sample of the results. If invoked with no subcommand, the cell will be executed against the specified session. Subcommands ----------- info Display the available Livy sessions and other configurations for sessions. add Add a Livy session given a session name (-s), language (-l), and endpoint credentials. The -k argument, if present, will skip adding this session if it already exists. e.g. `%spark add -s test -l python -u https://sparkcluster.net/livy -a u -p -k` config Override the livy session properties sent to Livy on session creation. All session creations will contain these config settings from then on. Expected value is a JSON key-value string to be sent as part of the Request Body for the POST /sessions endpoint in Livy. e.g. `%%spark config` `{"driverMemory":"1000M", "executorCores":4}` run Run Spark code against a session. e.g. `%%spark -s testsession` will execute the cell code against the testsession previously created e.g. `%%spark -s testsession -c sql` will execute the SQL code against the testsession previously created e.g. `%%spark -s testsession -c sql -o my_var` will execute the SQL code against the testsession previously created and store the pandas dataframe created in the my_var variable in the Python environment. logs Returns the logs for a given session. e.g. `%spark logs -s testsession` will return the logs for the testsession previously created delete Delete a Livy session. e.g. `%spark delete -s defaultlivy` cleanup Delete all Livy sessions created by the notebook. No arguments required. e.g. `%spark cleanup` """ usage = "Please look at usage of %spark by executing `%spark?`." user_input = line args = parse_argstring_or_throw(self.spark, user_input) subcommand = args.command[0].lower() # info if subcommand == "info": if args.url is not None: endpoint = Endpoint(args.url, args.user, args.password) info_sessions = self.spark_controller.get_all_sessions_endpoint_info(endpoint) self._print_endpoint_info(info_sessions) else: self._print_local_info() # config elif subcommand == "config": conf.override(conf.session_configs.__name__, json.loads(cell)) # add elif subcommand == "add": if args.url is None: self.ipython_display.send_error("Need to supply URL argument (e.g. -u https://example.com/livyendpoint)") return name = args.session language = args.language endpoint = Endpoint(args.url, args.user, args.password) skip = args.skip properties = conf.get_session_properties(language) self.spark_controller.add_session(name, endpoint, skip, properties) # delete elif subcommand == "delete": if args.session is not None: self.spark_controller.delete_session_by_name(args.session) elif args.url is not None: if args.id is None: self.ipython_display.send_error("Must provide --id or -i option to delete session at endpoint from URL") return endpoint = Endpoint(args.url, args.user, args.password) session_id = args.id self.spark_controller.delete_session_by_id(endpoint, session_id) else: self.ipython_display.send_error("Subcommand 'delete' requires a session name or a URL and session ID") # cleanup elif subcommand == "cleanup": if args.url is not None: endpoint = Endpoint(args.url, args.user, args.password) self.spark_controller.cleanup_endpoint(endpoint) else: self.spark_controller.cleanup() # logs elif subcommand == "logs": self.ipython_display.write(self.spark_controller.get_logs(args.session)) # run elif len(subcommand) == 0: if args.context == CONTEXT_NAME_SPARK: (success, out) = self.spark_controller.run_command(Command(cell), args.session) if success: self.ipython_display.write(out) else: self.ipython_display.send_error(out) elif args.context == CONTEXT_NAME_SQL: return self.execute_sqlquery(cell, args.samplemethod, args.maxrows, args.samplefraction, args.session, args.output, args.quiet) else: self.ipython_display.send_error("Context '{}' not found".format(args.context)) # error else: self.ipython_display.send_error("Subcommand '{}' not found. {}".format(subcommand, usage))
def help(self, line, cell="", local_ns=None): parse_argstring_or_throw(self.help, line) self._assure_cell_body_is_empty(KernelMagics.help.__name__, cell) help_html = u""" <table> <tr> <th>Magic</th> <th>Example</th> <th>Explanation</th> </tr> <tr> <td>info</td> <td>%%info</td> <td>Outputs session information for the current Livy endpoint.</td> </tr> <tr> <td>cleanup</td> <td>%%cleanup -f</td> <td>Deletes all sessions for the current Livy endpoint, including this notebook's session. The force flag is mandatory.</td> </tr> <tr> <td>delete</td> <td>%%delete -f -s 0</td> <td>Deletes a session by number for the current Livy endpoint. Cannot delete this kernel's session.</td> </tr> <tr> <td>logs</td> <td>%%logs</td> <td>Outputs the current session's Livy logs.</td> </tr> <tr> <td>configure</td> <td>%%configure -f<br/>{"executorMemory": "1000M", "executorCores": 4}</td> <td>Configure the session creation parameters. The force flag is mandatory if a session has already been created and the session will be dropped and recreated.<br/>Look at <a href="https://github.com/cloudera/livy#request-body"> Livy's POST /sessions Request Body</a> for a list of valid parameters. Parameters must be passed in as a JSON string.</td> </tr> <tr> <td>spark</td> <td>%%spark -o df<br/>df = spark.read.parquet('...</td> <td>Executes spark commands. Parameters: <ul> <li>-o VAR_NAME: The Spark dataframe of name VAR_NAME will be available in the %%local Python context as a <a href="http://pandas.pydata.org/">Pandas</a> dataframe with the same name.</li> <li>-m METHOD: Sample method, either <tt>take</tt> or <tt>sample</tt>.</li> <li>-n MAXROWS: The maximum number of rows of a dataframe that will be pulled from Livy to Jupyter. If this number is negative, then the number of rows will be unlimited.</li> <li>-r FRACTION: Fraction used for sampling.</li> </ul> </td> </tr> <tr> <td>sql</td> <td>%%sql -o tables -q<br/>SHOW TABLES</td> <td>Executes a SQL query against the variable sqlContext (Spark v1.x) or spark (Spark v2.x). Parameters: <ul> <li>-o VAR_NAME: The result of the SQL query will be available in the %%local Python context as a <a href="http://pandas.pydata.org/">Pandas</a> dataframe.</li> <li>-q: The magic will return None instead of the dataframe (no visualization).</li> <li>-m, -n, -r are the same as the %%spark parameters above.</li> </ul> </td> </tr> <tr> <td>local</td> <td>%%local<br/>a = 1</td> <td>All the code in subsequent lines will be executed locally. Code must be valid Python code.</td> </tr> <tr> <td>send_to_spark</td> <td>%%send_to_spark -i variable -t str -n var</td> <td>Sends a variable from local output to spark cluster. <br/> Parameters: <ul> <li>-i VAR_NAME: Local Pandas DataFrame(or String) of name VAR_NAME will be available in the %%spark context as a Spark dataframe(or String) with the same name.</li> <li>-t TYPE: Specifies the type of variable passed as -i. Available options are: `str` for string and `df` for Pandas DataFrame. Optional, defaults to `str`.</li> <li>-n NAME: Custom name of variable passed as -i. Optional, defaults to -i variable name.</li> <li>-m MAXROWS: Maximum amount of Pandas rows that will be sent to Spark. Defaults to 2500.</li> </ul> </td> </tr> </table> """ self.ipython_display.html(help_html)
def spark(self, line, cell="", local_ns=None): """Magic to execute spark remotely. This magic allows you to create a Livy Scala or Python session against a Livy endpoint. Every session can be used to execute either Spark code or SparkSQL code by executing against the SQL context in the session. When the SQL context is used, the result will be a Pandas dataframe of a sample of the results. If invoked with no subcommand, the cell will be executed against the specified session. Subcommands ----------- info Display the available Livy sessions and other configurations for sessions with None, Basic, or Kerberos auth. add Add a Livy session given a session name (-s), language (-l), and endpoint credentials. The -k argument, if present, will skip adding this session if it already exists. e.g. `%spark add -s test -l python -u https://sparkcluster.net/livy -t Kerberos -a u -p -k` config Override the livy session properties sent to Livy on session creation. All session creations will contain these config settings from then on. Expected value is a JSON key-value string to be sent as part of the Request Body for the POST /sessions endpoint in Livy. e.g. `%%spark config` `{"driverMemory":"1000M", "executorCores":4}` run Run Spark code against a session. e.g. `%%spark -s testsession` will execute the cell code against the testsession previously created e.g. `%%spark -s testsession -c sql` will execute the SQL code against the testsession previously created e.g. `%%spark -s testsession -c sql -o my_var` will execute the SQL code against the testsession previously created and store the pandas dataframe created in the my_var variable in the Python environment. logs Returns the logs for a given session. e.g. `%spark logs -s testsession` will return the logs for the testsession previously created delete Delete a Livy session. e.g. `%spark delete -s defaultlivy` cleanup Delete all Livy sessions created by the notebook. No arguments required. e.g. `%spark cleanup` """ user_input = line args = parse_argstring_or_throw(self.spark, user_input) subcommand = args.command[0].lower() if args.auth == "Google" and subcommand == "add": if args.url is None: self.ipython_display.send_error( "Need to supply URL argument (e.g. -u https://example.com/livyendpoint)" ) return name = args.session language = args.language endpoint = Endpoint(args.url, initialize_auth(args)) self.endpoints[args.url] = endpoint # convert Endpoints in self.endpoints into list of dictionaries, each storing an # Endpoints writeable attributes stored_endpoints = [ SerializableEndpoint(endpoint).__dict__ for endpoint in self.endpoints.values() ] self.db['autorestore/' + 'stored_endpoints'] = stored_endpoints skip = args.skip properties = conf.get_session_properties(language) self.spark_controller.add_session(name, endpoint, skip, properties) # session_id_to_name dict is necessary to restore session name across notebook sessions # since the livy server does not store the name. session_id_to_name = get_session_id_to_name(self.db, self.ipython_display) # add session id -> name to session_id_to_name dict session_id_to_name[self.spark_controller.session_manager.get_session(name).id] = name self.db['autorestore/' + 'session_id_to_name'] = session_id_to_name elif subcommand == "info": if args.url is not None and args.id is not None: endpoint = Endpoint(args.url, initialize_auth(args)) info_sessions = self.spark_controller.get_all_sessions_endpoint_info(endpoint) self._print_endpoint_info(info_sessions, args.id) else: self._print_local_info() else: self.__remotesparkmagics.spark(line, cell, local_ns=None)
def help(self, line, cell="", local_ns=None): parse_argstring_or_throw(self.help, line) self._assure_cell_body_is_empty(KernelMagics.help.__name__, cell) help_html = u""" <table> <tr> <th>Magic</th> <th>Example</th> <th>Explanation</th> </tr> <tr> <td>info</td> <td>%%info</td> <td>Outputs session information for the current Livy endpoint.</td> </tr> <tr> <td>cleanup</td> <td>%%cleanup -f</td> <td>Deletes all sessions for the current Livy endpoint, including this notebook's session. The force flag is mandatory.</td> </tr> <tr> <td>delete</td> <td>%%delete -f -s 0</td> <td>Deletes a session by number for the current Livy endpoint. Cannot delete this kernel's session.</td> </tr> <tr> <td>logs</td> <td>%%logs</td> <td>Outputs the current session's Livy logs.</td> </tr> <tr> <td>configure</td> <td>%%configure -f<br/>{"executorMemory": "1000M", "executorCores": 4}</td> <td>Configure the session creation parameters. The force flag is mandatory if a session has already been created and the session will be dropped and recreated.<br/>Look at <a href="https://github.com/cloudera/livy#request-body"> Livy's POST /sessions Request Body</a> for a list of valid parameters. Parameters must be passed in as a JSON string.</td> </tr> <tr> <td>sql</td> <td>%%sql -o tables -q<br/>SHOW TABLES</td> <td>Executes a SQL query against the sqlContext. Parameters: <ul> <li>-o VAR_NAME: The result of the query will be available in the %%local Python context as a <a href="http://pandas.pydata.org/">Pandas</a> dataframe.</li> <li>-q: The magic will return None instead of the dataframe (no visualization).</li> <li>-m METHOD: Sample method, either <tt>take</tt> or <tt>sample</tt>.</li> <li>-n MAXROWS: The maximum number of rows of a SQL query that will be pulled from Livy to Jupyter. If this number is negative, then the number of rows will be unlimited.</li> <li>-r FRACTION: Fraction used for sampling.</li> </ul> </td> </tr> <tr> <td>local</td> <td>%%local<br/>a = 1</td> <td>All the code in subsequent lines will be executed locally. Code must be valid Python code.</td> </tr> </table> """ self.ipython_display.html(help_html)
def help(self, line, cell="", local_ns=None): parse_argstring_or_throw(self.help, line) self._assure_cell_body_is_empty(KernelMagics.help.__name__, cell) help_html = u""" <table> <tr> <th>Magic</th> <th>Example</th> <th>Explanation</th> </tr> <tr> <td>info</td> <td>%%info</td> <td>Outputs session information for the current Livy endpoint.</td> </tr> <tr> <td>cleanup</td> <td>%%cleanup -f</td> <td>Deletes all sessions for the current Livy endpoint, including this notebook's session. The force flag is mandatory.</td> </tr> <tr> <td>delete</td> <td>%%delete -f -s 0</td> <td>Deletes a session by number for the current Livy endpoint. Cannot delete this kernel's session.</td> </tr> <tr> <td>logs</td> <td>%%logs</td> <td>Outputs the current session's Livy logs.</td> </tr> <tr> <td>configure</td> <td>%%configure -f<br/>{"executorMemory": "1000M", "executorCores": 4}</td> <td>Configure the session creation parameters. The force flag is mandatory if a session has already been created and the session will be dropped and recreated.<br/>Look at <a href="https://github.com/cloudera/livy#request-body"> Livy's POST /sessions Request Body</a> for a list of valid parameters. Parameters must be passed in as a JSON string.</td> </tr> <tr> <td>sql</td> <td>%%sql -o tables -q<br/>SHOW TABLES</td> <td>Executes a SQL query against the sqlContext. Parameters: <ul> <li>-o VAR_NAME: The result of the query will be available in the %%local Python context as a <a href="http://pandas.pydata.org/">Pandas</a> dataframe.</li> <li>-q: The magic will return None instead of the dataframe (no visualization).</li> <li>-m METHOD: Sample method, either <tt>take</tt> or <tt>sample</tt>.</li> <li>-n MAXROWS: The maximum number of rows of a SQL query that will be pulled from Livy to Jupyter. If this number is negative, then the number of rows will be unlimited.</li> <li>-r FRACTION: Fraction used for sampling.</li> </ul> </td> </tr> <tr> <td>local</td> <td>%%local<br/>a = 1</td> <td>All the code in subsequent lines will be executed locally. Code must be valid Python code.</td> </tr> </table> """ self.ipython_display.html(help_html)