def test_add_sessions_command_parses(): # Do not skip and python add_sessions_mock = MagicMock() spark_controller.add_session = add_sessions_mock command = "add" name = "-s name" language = "-l python" connection_string = "-u http://url.com -t {} -a sdf -p w".format( AUTH_BASIC) line = " ".join([command, name, language, connection_string]) magic.spark(line) args = parse_argstring_or_throw(RemoteSparkMagics.spark, line) add_sessions_mock.assert_called_once_with( "name", Endpoint("http://url.com", initialize_auth(args)), False, {"kind": "pyspark"}) # Skip and scala - upper case add_sessions_mock = MagicMock() spark_controller.add_session = add_sessions_mock command = "add" name = "-s name" language = "-l scala" connection_string = "--url http://location:port" line = " ".join([command, name, language, connection_string, "-k"]) magic.spark(line) args = parse_argstring_or_throw(RemoteSparkMagics.spark, line) args.auth = NO_AUTH add_sessions_mock.assert_called_once_with( "name", Endpoint("http://location:port", initialize_auth(args)), True, {"kind": "spark"})
def test_add_sessions_command_raises_google_no_account(): with patch('google.auth.default', return_value=(creds, 'project'), \ autospec=True): add_sessions_mock = MagicMock() spark_controller.add_session = add_sessions_mock command = "add" name = "-s name" language = "-l python" connection_string = "-u http://url.com -t Google" line = " ".join([command, name, language, connection_string]) magic.spark(line) args = parse_argstring_or_throw(DataprocMagics.spark, line) initialize_auth(args)
def test_cleanup_endpoint_command_parses(): mock_method = MagicMock() spark_controller.cleanup_endpoint = mock_method line = "cleanup -u endp" magic.spark(line) args = parse_argstring_or_throw(RemoteSparkMagics.spark, line) args.auth = NO_AUTH mock_method.assert_called_once_with(Endpoint("endp", initialize_auth(args))) line = "cleanup -u endp -a user -p passw -t {}".format(AUTH_BASIC) magic.spark(line) args = parse_argstring_or_throw(RemoteSparkMagics.spark, line) mock_method.assert_called_with(Endpoint("endp", initialize_auth(args)))
def _do_not_call_change_endpoint(self, line, cell="", local_ns=None): args = parse_argstring_or_throw(self._do_not_call_change_endpoint, line) if self.session_started: error = u"Cannot change the endpoint if a session has been started." raise BadUserDataException(error) auth = initialize_auth(args=args) self.endpoint = Endpoint(args.url, auth)
def test_add_sessions_command_parses_kerberos(): # Do not skip and python add_sessions_mock = MagicMock() spark_controller.add_session = add_sessions_mock command = "add" name = "-s name" language = "-l python" connection_string = "-u http://url.com -t {}".format('Kerberos') line = " ".join([command, name, language, connection_string]) magic.spark(line) args = parse_argstring_or_throw(RemoteSparkMagics.spark, line) auth_instance = initialize_auth(args) add_sessions_mock.assert_called_once_with( "name", Endpoint("http://url.com", initialize_auth(args)), False, {"kind": "pyspark"}) assert_equals(auth_instance.url, "http://url.com")
def refresh_configuration(self): credentials = getattr( conf, 'base64_kernel_' + self.language + '_credentials')() (username, password, auth, url) = (credentials['username'], credentials['password'], credentials['auth'], credentials['url']) args = Namespace(auth=auth, user=username, password=password, url=url) auth_instance = initialize_auth(args) self.endpoint = Endpoint(url, auth_instance)
def test_add_sessions_command_parses_google_default_credentials(): with patch('google.auth.default', return_value=(creds, 'project'), \ autospec=True): add_sessions_mock = MagicMock() spark_controller.add_session = add_sessions_mock command = "add" name = "-s name" language = "-l python" account = "-g default-credentials" connection_string = "-u http://url.com -t Google" line = " ".join([command, name, language, connection_string, account]) magic.spark(line) args = parse_argstring_or_throw(DataprocMagics.spark, line) auth_instance = initialize_auth(args) add_sessions_mock.assert_called_once_with( "name", Endpoint("http://url.com", initialize_auth(args)), False, {"kind": "pyspark"}) assert_equals(auth_instance.url, "http://url.com") isinstance(auth_instance, GoogleAuth) assert_equals(auth_instance.active_credentials, 'default-credentials')
def test_add_sessions_command_parses_google_user_credentials(): with patch('sparkmagic.auth.google.list_credentialed_user_accounts', \ return_value=mock_credentialed_accounts_valid_accounts), patch('subprocess.check_output',\ return_value=AUTH_DESCRIBE_USER): add_sessions_mock = MagicMock() spark_controller.add_session = add_sessions_mock command = "add" name = "-s name" language = "-l python" account = "-g [email protected]" connection_string = "-u http://url.com -t Google" line = " ".join([command, name, language, connection_string, account]) magic.spark(line) args = parse_argstring_or_throw(DataprocMagics.spark, line) auth_instance = initialize_auth(args) add_sessions_mock.assert_called_once_with( "name", Endpoint("http://url.com", initialize_auth(args)), False, {"kind": "pyspark"}) assert_equals(auth_instance.url, "http://url.com") isinstance(auth_instance, GoogleAuth) assert_equals(auth_instance.active_credentials, '*****@*****.**')
def test_change_endpoint(): s = 'server' u = 'user' p = 'password' t = constants.AUTH_BASIC line = "-s {} -u {} -p {} -t {}".format(s, u, p, t) magic._do_not_call_change_endpoint(line) args = Namespace(auth='Basic_Access', password='******', url='server', user='******') auth_instance = initialize_auth(args) endpoint = Endpoint(s, auth_instance) assert_equals(endpoint.url, magic.endpoint.url) assert_equals(Endpoint(s, auth_instance), magic.endpoint)
def test_delete_sessions_command_parses(): mock_method = MagicMock() spark_controller.delete_session_by_name = mock_method command = "delete -s name" magic.spark(command) mock_method.assert_called_once_with("name") command = "delete -u URL -t {} -a username -p password -i 4".format( AUTH_BASIC) mock_method = MagicMock() spark_controller.delete_session_by_id = mock_method magic.spark(command) args = parse_argstring_or_throw(RemoteSparkMagics.spark, command) mock_method.assert_called_once_with(Endpoint("URL", initialize_auth(args)), 4)
def _get_default_endpoints(): default_endpoints = set() for kernel_type in LANGS_SUPPORTED: endpoint_config = getattr(conf, 'kernel_%s_credentials' % kernel_type)() if all([p in endpoint_config for p in ["url", "password", "username"]]) and endpoint_config["url"] != "": user = endpoint_config["username"] passwd = endpoint_config["password"] args = Namespace(user=user, password=passwd, auth=endpoint_config.get("auth", None), url=endpoint_config.get("url", None)) auth_instance = initialize_auth(args) default_endpoints.add(Endpoint( auth=auth_instance, url=endpoint_config["url"], implicitly_added=True)) return default_endpoints
def test_add_sessions_command_exception(): # Do not skip and python add_sessions_mock = MagicMock(side_effect=BadUserDataException('hehe')) spark_controller.add_session = add_sessions_mock command = "add" name = "-s name" language = "-l python" connection_string = "-u http://url.com -t {} -a sdf -p w".format( AUTH_BASIC) line = " ".join([command, name, language, connection_string]) magic.spark(line) args = parse_argstring_or_throw(RemoteSparkMagics.spark, line) add_sessions_mock.assert_called_once_with( "name", Endpoint("http://url.com", initialize_auth(args)), False, {"kind": "pyspark"}) ipython_display.send_error.assert_called_once_with( EXPECTED_ERROR_MSG.format(add_sessions_mock.side_effect))
def _restore_endpoints_and_sessions(db, ipython_display, spark_controller, endpoints): """Loads all of the running livy sessions of an endpoint Args: db (dict): the ipython database where stored_endpoints list will be stored ipython_display (hdijupyterutils.ipythondisplay.IpythonDisplay): the display that informs the user of any errors that occur while restoring endpoints spark_controller (sparkmagic.livyclientlib.sparkcontroller.SparkController): an object that manages all the spark sessions endpoints (dict): the endpoints dict that restored endpoints will be added to. """ stored_endpoints = get_stored_endpoints(db, ipython_display) try: for serialized_endpoint in stored_endpoints: args = Namespace(auth='Google', url=serialized_endpoint.get('url'), \ account=serialized_endpoint.get('account')) auth = initialize_auth(args) endpoint = Endpoint(url=serialized_endpoint.get('url'), auth=auth) endpoints[endpoint.url] = endpoint # If a user revokes the credentials used for stored endpoints and sessions, # all of the stored endpoints and sessions are cleared. except BadUserConfigurationException as caught_exc: db['autorestore/' + 'stored_endpoints'] = list() db['autorestore/' + 'session_id_to_name'] = dict() ipython_display.send_error("Failed to restore endpoints and sessions "\ f"due to an authentication error: {str(caught_exc)}. "\ "Cleared stored_endpoints and session_id_to_name.") for endpoint in endpoints.values(): session_id_to_name = get_session_id_to_name(db, ipython_display) #get all sessions running on that endpoint endpoint_sessions = spark_controller.get_all_sessions_endpoint( endpoint) #add each session to session manager. for session in endpoint_sessions: name = session_id_to_name.get(session.id) if name is not None and name not in spark_controller.get_managed_clients( ): spark_controller.session_manager.add_session(name, session)
def test_add_sessions_command_extra_properties(): conf.override_all({}) magic.spark("config", "{\"extra\": \"yes\"}") assert conf.session_configs() == {"extra": "yes"} add_sessions_mock = MagicMock() spark_controller.add_session = add_sessions_mock command = "add" name = "-s name" language = "-l scala" connection_string = "-u http://livyendpoint.com" line = " ".join([command, name, language, connection_string]) magic.spark(line) args = parse_argstring_or_throw(RemoteSparkMagics.spark, line) args.auth = NO_AUTH add_sessions_mock.assert_called_once_with( "name", Endpoint("http://livyendpoint.com", initialize_auth(args)), False, { "kind": "spark", "extra": "yes" }) conf.override_all({})
def spark(self, line, cell="", local_ns=None): """Magic to execute spark remotely. This magic allows you to create a Livy Scala or Python session against a Livy endpoint. Every session can be used to execute either Spark code or SparkSQL code by executing against the SQL context in the session. When the SQL context is used, the result will be a Pandas dataframe of a sample of the results. If invoked with no subcommand, the cell will be executed against the specified session. Subcommands ----------- info Display the available Livy sessions and other configurations for sessions with None, Basic, or Kerberos auth. add Add a Livy session given a session name (-s), language (-l), and endpoint credentials. The -k argument, if present, will skip adding this session if it already exists. e.g. `%spark add -s test -l python -u https://sparkcluster.net/livy -t Kerberos -a u -p -k` config Override the livy session properties sent to Livy on session creation. All session creations will contain these config settings from then on. Expected value is a JSON key-value string to be sent as part of the Request Body for the POST /sessions endpoint in Livy. e.g. `%%spark config` `{"driverMemory":"1000M", "executorCores":4}` run Run Spark code against a session. e.g. `%%spark -s testsession` will execute the cell code against the testsession previously created e.g. `%%spark -s testsession -c sql` will execute the SQL code against the testsession previously created e.g. `%%spark -s testsession -c sql -o my_var` will execute the SQL code against the testsession previously created and store the pandas dataframe created in the my_var variable in the Python environment. logs Returns the logs for a given session. e.g. `%spark logs -s testsession` will return the logs for the testsession previously created delete Delete a Livy session. e.g. `%spark delete -s defaultlivy` cleanup Delete all Livy sessions created by the notebook. No arguments required. e.g. `%spark cleanup` """ user_input = line args = parse_argstring_or_throw(self.spark, user_input) subcommand = args.command[0].lower() if args.auth == "Google" and subcommand == "add": if args.url is None: self.ipython_display.send_error( "Need to supply URL argument (e.g. -u https://example.com/livyendpoint)" ) return name = args.session language = args.language endpoint = Endpoint(args.url, initialize_auth(args)) self.endpoints[args.url] = endpoint # convert Endpoints in self.endpoints into list of dictionaries, each storing an # Endpoints writeable attributes stored_endpoints = [ SerializableEndpoint(endpoint).__dict__ for endpoint in self.endpoints.values() ] self.db['autorestore/' + 'stored_endpoints'] = stored_endpoints skip = args.skip properties = conf.get_session_properties(language) self.spark_controller.add_session(name, endpoint, skip, properties) # session_id_to_name dict is necessary to restore session name across notebook sessions # since the livy server does not store the name. session_id_to_name = get_session_id_to_name(self.db, self.ipython_display) # add session id -> name to session_id_to_name dict session_id_to_name[self.spark_controller.session_manager.get_session(name).id] = name self.db['autorestore/' + 'session_id_to_name'] = session_id_to_name elif subcommand == "info": if args.url is not None and args.id is not None: endpoint = Endpoint(args.url, initialize_auth(args)) info_sessions = self.spark_controller.get_all_sessions_endpoint_info(endpoint) self._print_endpoint_info(info_sessions, args.id) else: self._print_local_info() else: self.__remotesparkmagics.spark(line, cell, local_ns=None)
def spark(self, line, cell="", local_ns=None): """Magic to execute spark remotely. This magic allows you to create a Livy Scala or Python session against a Livy endpoint. Every session can be used to execute either Spark code or SparkSQL code by executing against the SQL context in the session. When the SQL context is used, the result will be a Pandas dataframe of a sample of the results. If invoked with no subcommand, the cell will be executed against the specified session. Subcommands ----------- info Display the available Livy sessions and other configurations for sessions. add Add a Livy session given a session name (-s), language (-l), and endpoint credentials. The -k argument, if present, will skip adding this session if it already exists. e.g. `%spark add -s test -l python -u https://sparkcluster.net/livy -t Kerberos -a u -p -k` config Override the livy session properties sent to Livy on session creation. All session creations will contain these config settings from then on. Expected value is a JSON key-value string to be sent as part of the Request Body for the POST /sessions endpoint in Livy. e.g. `%%spark config` `{"driverMemory":"1000M", "executorCores":4}` run Run Spark code against a session. e.g. `%%spark -s testsession` will execute the cell code against the testsession previously created e.g. `%%spark -s testsession -c sql` will execute the SQL code against the testsession previously created e.g. `%%spark -s testsession -c sql -o my_var` will execute the SQL code against the testsession previously created and store the pandas dataframe created in the my_var variable in the Python environment. logs Returns the logs for a given session. e.g. `%spark logs -s testsession` will return the logs for the testsession previously created delete Delete a Livy session. e.g. `%spark delete -s defaultlivy` cleanup Delete all Livy sessions created by the notebook. No arguments required. e.g. `%spark cleanup` """ usage = "Please look at usage of %spark by executing `%spark?`." user_input = line args = parse_argstring_or_throw(self.spark, user_input) subcommand = args.command[0].lower() if args.auth is None: args.auth = conf.get_auth_value(args.user, args.password) else: args.auth = args.auth # info if subcommand == "info": if args.url is not None and args.id is not None: endpoint = Endpoint(args.url, initialize_auth(args)) info_sessions = self.spark_controller.get_all_sessions_endpoint_info(endpoint) self._print_endpoint_info(info_sessions, args.id) else: self._print_local_info() # config elif subcommand == "config": conf.override(conf.session_configs.__name__, json.loads(cell)) # add elif subcommand == "add": if args.url is None: self.ipython_display.send_error("Need to supply URL argument (e.g. -u https://example.com/livyendpoint)") return name = args.session language = args.language endpoint = Endpoint(args.url, initialize_auth(args)) skip = args.skip properties = conf.get_session_properties(language) self.spark_controller.add_session(name, endpoint, skip, properties) # delete elif subcommand == "delete": if args.session is not None: self.spark_controller.delete_session_by_name(args.session) elif args.url is not None: if args.id is None: self.ipython_display.send_error("Must provide --id or -i option to delete session at endpoint from URL") return endpoint = Endpoint(args.url, initialize_auth(args)) session_id = args.id self.spark_controller.delete_session_by_id(endpoint, session_id) else: self.ipython_display.send_error("Subcommand 'delete' requires a session name or a URL and session ID") # cleanup elif subcommand == "cleanup": if args.url is not None: endpoint = Endpoint(args.url, initialize_auth(args)) self.spark_controller.cleanup_endpoint(endpoint) else: self.spark_controller.cleanup() # logs elif subcommand == "logs": self.ipython_display.write(self.spark_controller.get_logs(args.session)) # run elif len(subcommand) == 0: coerce = get_coerce_value(args.coerce) if args.context == CONTEXT_NAME_SPARK: return self.execute_spark(cell, args.output, args.samplemethod, args.maxrows, args.samplefraction, args.session, coerce) elif args.context == CONTEXT_NAME_SQL: return self.execute_sqlquery(cell, args.samplemethod, args.maxrows, args.samplefraction, args.session, args.output, args.quiet, coerce) else: self.ipython_display.send_error("Context '{}' not found".format(args.context)) # error else: self.ipython_display.send_error("Subcommand '{}' not found. {}".format(subcommand, usage))