Exemplo n.º 1
0
def test_execute_failure_get_statement_output_emits_event():
    spark_events = MagicMock()
    kind = SESSION_KIND_SPARK
    http_client = MagicMock()
    conf.override_all({
        "status_sleep_seconds": 0.01,
        "statement_sleep_seconds": 0.01
    })
    session = tls.TestLivySession._create_session(kind=kind, http_client=http_client)
    session.wait_for_idle = MagicMock()
    conf.load()
    session.start(create_sql_context=False)
    session.wait_for_idle = MagicMock()
    command = Command("command", spark_events=spark_events)
    command._get_statement_output = MagicMock(side_effect=AttributeError('OHHHH'))

    try:
        result = command.execute(session)
        assert False
    except AttributeError as e:
        spark_events.emit_statement_execution_start_event._assert_called_once_with(session.guid, session.kind,
                                                                                   session.id, command.guid)
        spark_events.emit_statement_execution_start_event._assert_called_once_with(session.guid, session.kind,
                                                                                   session.id, command.guid,
                                                                                   -1, False, "AttributeError",
                                                                                   "OHHHH")
        assert_equals(e, command._get_statement_output.side_effect)
Exemplo n.º 2
0
def test_execute_failure_post_statement_emits_event():
    spark_events = MagicMock()
    kind = SESSION_KIND_SPARK
    http_client = MagicMock()
    http_client.post_statement.side_effect = KeyError('Something bad happened here')
    conf.override_all({
        "status_sleep_seconds": 0.01,
        "statement_sleep_seconds": 0.01
    })
    session = tls.TestLivySession._create_session(kind=kind, http_client=http_client)
    session.wait_for_idle = MagicMock()
    conf.load()
    session.start(create_sql_context=False)
    session.wait_for_idle = MagicMock()
    command = Command("command", spark_events=spark_events)

    try:
        result = command.execute(session)
        assert False
    except KeyError as e:
        spark_events.emit_statement_execution_start_event._assert_called_once_with(session.guid, session.kind,
                                                                                   session.id, command.guid)
        spark_events.emit_statement_execution_start_event._assert_called_once_with(session.guid, session.kind,
                                                                                   session.id, command.guid,
                                                                                   -1, False, "KeyError",
                                                                                   "Something bad happened here")
        assert_equals(e, http_client.post_statement.side_effect)
Exemplo n.º 3
0
def test_execute():
    spark_events = MagicMock()
    kind = SESSION_KIND_SPARK
    http_client = MagicMock()
    http_client.post_session.return_value = tls.TestLivySession.session_create_json
    http_client.post_statement.return_value = tls.TestLivySession.post_statement_json
    http_client.get_session.return_value = tls.TestLivySession.ready_sessions_json
    http_client.get_statement.return_value = tls.TestLivySession.ready_statement_json
    conf.override_all({
        "status_sleep_seconds": 0.01,
        "statement_sleep_seconds": 0.01
    })
    session = tls.TestLivySession._create_session(kind=kind, http_client=http_client)
    conf.load()
    session.start(create_sql_context=False)
    command = Command("command", spark_events=spark_events)

    result = command.execute(session)

    http_client.post_statement.assert_called_with(0, {"code": command.code})
    http_client.get_statement.assert_called_with(0, 0)
    assert result[0]
    assert_equals(tls.TestLivySession.pi_result, result[1])
    spark_events.emit_statement_execution_start_event._assert_called_once_with(session.guid, session.kind,
                                                                                        session.id, command.guid)
    spark_events.emit_statement_execution_end_event._assert_called_once_with(session.guid, session.kind,
                                                                                      session.id, command.guid,
                                                                                      0, True, "", "")
Exemplo n.º 4
0
def test_execute_failure_post_statement_emits_event():
    spark_events = MagicMock()
    kind = SESSION_KIND_SPARK
    http_client = MagicMock()
    http_client.post_statement.side_effect = KeyError(
        'Something bad happened here')
    conf.override_all({
        "status_sleep_seconds": 0.01,
        "statement_sleep_seconds": 0.01
    })
    session = tls.TestLivySession._create_session(kind=kind,
                                                  http_client=http_client)
    session.wait_for_idle = MagicMock()
    conf.load()
    session.start(create_sql_context=False)
    session.wait_for_idle = MagicMock()
    command = Command("command", spark_events=spark_events)

    try:
        result = command.execute(session)
        assert False
    except KeyError as e:
        spark_events.emit_statement_execution_start_event._assert_called_once_with(
            session.guid, session.kind, session.id, command.guid)
        spark_events.emit_statement_execution_start_event._assert_called_once_with(
            session.guid, session.kind, session.id, command.guid, -1, False,
            "KeyError", "Something bad happened here")
        assert_equals(e, http_client.post_statement.side_effect)
Exemplo n.º 5
0
def test_execute_failure_wait_for_session_emits_event():
    spark_events = MagicMock()
    kind = SESSION_KIND_SPARK
    http_client = MagicMock()
    http_client.post_session.return_value = tls.TestLivySession.session_create_json
    http_client.post_statement.return_value = tls.TestLivySession.post_statement_json
    http_client.get_session.return_value = tls.TestLivySession.ready_sessions_json
    http_client.get_statement.return_value = tls.TestLivySession.ready_statement_json
    conf.override_all({
        "status_sleep_seconds": 0.01,
        "statement_sleep_seconds": 0.01
    })
    session = tls.TestLivySession._create_session(kind=kind, http_client=http_client)
    conf.load()
    session.start(create_sql_context=False)
    session.wait_for_idle = MagicMock(side_effect=ValueError("yo"))
    command = Command("command", spark_events=spark_events)

    try:
        result = command.execute(session)
        assert False
    except ValueError as e:
        spark_events.emit_statement_execution_start_event._assert_called_once_with(session.guid, session.kind,
                                                                                   session.id, command.guid)
        spark_events.emit_statement_execution_start_event._assert_called_once_with(session.guid, session.kind,
                                                                                   session.id, command.guid,
                                                                                   -1, False, "ValueError", "yo")
        assert_equals(e, session.wait_for_idle.side_effect)
Exemplo n.º 6
0
def test_execute():
    spark_events = MagicMock()
    kind = SESSION_KIND_SPARK
    http_client = MagicMock()
    http_client.post_session.return_value = tls.TestLivySession.session_create_json
    http_client.post_statement.return_value = tls.TestLivySession.post_statement_json
    http_client.get_session.return_value = tls.TestLivySession.ready_sessions_json
    http_client.get_statement.return_value = tls.TestLivySession.ready_statement_json
    conf.override_all({
        "status_sleep_seconds": 0.01,
        "statement_sleep_seconds": 0.01
    })
    session = tls.TestLivySession._create_session(kind=kind,
                                                  http_client=http_client)
    conf.load()
    session.start(create_sql_context=False)
    command = Command("command", spark_events=spark_events)

    result = command.execute(session)

    http_client.post_statement.assert_called_with(0, {"code": command.code})
    http_client.get_statement.assert_called_with(0, 0)
    assert result[0]
    assert_equals(tls.TestLivySession.pi_result, result[1])
    spark_events.emit_statement_execution_start_event._assert_called_once_with(
        session.guid, session.kind, session.id, command.guid)
    spark_events.emit_statement_execution_end_event._assert_called_once_with(
        session.guid, session.kind, session.id, command.guid, 0, True, "", "")
Exemplo n.º 7
0
def test_execute_failure_get_statement_output_emits_event():
    spark_events = MagicMock()
    kind = SESSION_KIND_SPARK
    http_client = MagicMock()
    conf.override_all({
        "status_sleep_seconds": 0.01,
        "statement_sleep_seconds": 0.01
    })
    session = tls.TestLivySession._create_session(kind=kind,
                                                  http_client=http_client)
    session.wait_for_idle = MagicMock()
    conf.load()
    session.start(create_sql_context=False)
    session.wait_for_idle = MagicMock()
    command = Command("command", spark_events=spark_events)
    command._get_statement_output = MagicMock(
        side_effect=AttributeError('OHHHH'))

    try:
        result = command.execute(session)
        assert False
    except AttributeError as e:
        spark_events.emit_statement_execution_start_event._assert_called_once_with(
            session.guid, session.kind, session.id, command.guid)
        spark_events.emit_statement_execution_start_event._assert_called_once_with(
            session.guid, session.kind, session.id, command.guid, -1, False,
            "AttributeError", "OHHHH")
        assert_equals(e, command._get_statement_output.side_effect)
Exemplo n.º 8
0
def test_execute_failure_wait_for_session_emits_event():
    spark_events = MagicMock()
    kind = SESSION_KIND_SPARK
    http_client = MagicMock()
    http_client.post_session.return_value = tls.TestLivySession.session_create_json
    http_client.post_statement.return_value = tls.TestLivySession.post_statement_json
    http_client.get_session.return_value = tls.TestLivySession.ready_sessions_json
    http_client.get_statement.return_value = tls.TestLivySession.ready_statement_json
    conf.override_all({
        "status_sleep_seconds": 0.01,
        "statement_sleep_seconds": 0.01
    })
    session = tls.TestLivySession._create_session(kind=kind,
                                                  http_client=http_client)
    conf.load()
    session.start(create_sql_context=False)
    session.wait_for_idle = MagicMock(side_effect=ValueError("yo"))
    command = Command("command", spark_events=spark_events)

    try:
        result = command.execute(session)
        assert False
    except ValueError as e:
        spark_events.emit_statement_execution_start_event._assert_called_once_with(
            session.guid, session.kind, session.id, command.guid)
        spark_events.emit_statement_execution_start_event._assert_called_once_with(
            session.guid, session.kind, session.id, command.guid, -1, False,
            "ValueError", "yo")
        assert_equals(e, session.wait_for_idle.side_effect)
Exemplo n.º 9
0
def test_pyspark_livy_sql_options():
    query = "abc"

    sqlquery = SQLQuery(query, samplemethod='take', maxrows=120)
    assert_equals(sqlquery._pyspark_command(),
                  Command('for {} in sqlContext.sql("""{}""").toJSON().take(120): print({})'\
                          .format(LONG_RANDOM_VARIABLE_NAME, query,
                                  LONG_RANDOM_VARIABLE_NAME)))

    sqlquery = SQLQuery(query, samplemethod='take', maxrows=-1)
    assert_equals(sqlquery._pyspark_command(),
                  Command('for {} in sqlContext.sql("""{}""").toJSON().collect(): print({})'\
                          .format(LONG_RANDOM_VARIABLE_NAME, query,
                                  LONG_RANDOM_VARIABLE_NAME)))

    sqlquery = SQLQuery(query, samplemethod='sample', samplefraction=0.25, maxrows=-1)
    assert_equals(sqlquery._pyspark_command(),
                  Command('for {} in sqlContext.sql("""{}""").toJSON().sample(False, 0.25).collect(): print({})'\
                          .format(LONG_RANDOM_VARIABLE_NAME, query,
                                  LONG_RANDOM_VARIABLE_NAME)))

    sqlquery = SQLQuery(query, samplemethod='sample', samplefraction=0.33, maxrows=3234)
    assert_equals(sqlquery._pyspark_command(),
                  Command('for {} in sqlContext.sql("""{}""").toJSON().sample(False, 0.33).take(3234): print({})'\
                          .format(LONG_RANDOM_VARIABLE_NAME, query,
                                  LONG_RANDOM_VARIABLE_NAME)))
Exemplo n.º 10
0
def test_scala_livy_sql_options():
    query = "abc"

    sqlquery = SQLQuery(query, samplemethod='take', maxrows=100)
    assert_equals(sqlquery._scala_command(),
                  Command('sqlContext.sql("""{}""").toJSON.take(100).foreach(println)'.format(query)))

    sqlquery = SQLQuery(query, samplemethod='take', maxrows=-1)
    assert_equals(sqlquery._scala_command(),
                  Command('sqlContext.sql("""{}""").toJSON.collect.foreach(println)'.format(query)))

    sqlquery = SQLQuery(query, samplemethod='sample', samplefraction=0.25, maxrows=-1)
    assert_equals(sqlquery._scala_command(),
                  Command('sqlContext.sql("""{}""").toJSON.sample(false, 0.25).collect.foreach(println)'.format(query)))

    sqlquery = SQLQuery(query, samplemethod='sample', samplefraction=0.33, maxrows=3234)
    assert_equals(sqlquery._scala_command(),
                  Command('sqlContext.sql("""{}""").toJSON.sample(false, 0.33).take(3234).foreach(println)'.format(query)))
Exemplo n.º 11
0
 def spark(self, line, cell="", local_ns=None):
     parse_argstring_or_throw(self.spark, line)
     if self._do_not_call_start_session(""):
         (success, out) = self.spark_controller.run_command(Command(cell))
         if success:
             self.ipython_display.write(out)
         else:
             self.ipython_display.send_error(out)
     else:
         return None
Exemplo n.º 12
0
def test_spark_unexpected_exception():
    line = ""
    cell = "some spark code"
    spark_controller.run_command = MagicMock(side_effect=Exception('oups'))

    magic.spark(line, cell)
    spark_controller.run_command.assert_called_once_with(Command(cell))
    ipython_display.send_error.assert_called_once_with(
        constants.INTERNAL_ERROR_MSG.format(
            spark_controller.run_command.side_effect))
Exemplo n.º 13
0
def test_spark_error():
    line = ""
    cell = "some spark code"
    spark_controller.run_command = MagicMock(return_value=(False, line))

    magic.spark(line, cell)

    ipython_display.send_error.assert_called_once_with(line)
    spark_controller.add_session.assert_called_once_with(
        magic.session_name, magic.endpoint, False,
        {"kind": constants.SESSION_KIND_PYSPARK})
    spark_controller.run_command.assert_called_once_with(Command(cell))
Exemplo n.º 14
0
def test_run_cell_command_exception():
    run_cell_method = MagicMock()
    run_cell_method.side_effect = HttpClientException('meh')
    spark_controller.run_command = run_cell_method

    command = "-s"
    name = "sessions_name"
    line = " ".join([command, name])
    cell = "cell code"

    result = magic.spark(line, cell)

    run_cell_method.assert_called_once_with(Command(cell), name)
    assert result is None
    ipython_display.send_error.assert_called_once_with(
        EXPECTED_ERROR_MSG.format(run_cell_method.side_effect))
Exemplo n.º 15
0
def test_run_cell_command_writes_to_err():
    run_cell_method = MagicMock()
    result_value = ""
    run_cell_method.return_value = (False, result_value)
    spark_controller.run_command = run_cell_method

    command = "-s"
    name = "sessions_name"
    line = " ".join([command, name])
    cell = "cell code"

    result = magic.spark(line, cell)

    run_cell_method.assert_called_once_with(Command(cell), name)
    assert result is None
    ipython_display.send_error.assert_called_once_with(result_value)
Exemplo n.º 16
0
    def spark(self, line, cell="", local_ns=None):
        """Magic to execute spark remotely.

           This magic allows you to create a Livy Scala or Python session against a Livy endpoint. Every session can
           be used to execute either Spark code or SparkSQL code by executing against the SQL context in the session.
           When the SQL context is used, the result will be a Pandas dataframe of a sample of the results.

           If invoked with no subcommand, the cell will be executed against the specified session.

           Subcommands
           -----------
           info
               Display the available Livy sessions and other configurations for sessions.
           add
               Add a Livy session given a session name (-s), language (-l), and endpoint credentials.
               The -k argument, if present, will skip adding this session if it already exists.
               e.g. `%spark add -s test -l python -u https://sparkcluster.net/livy -a u -p -k`
           config
               Override the livy session properties sent to Livy on session creation. All session creations will
               contain these config settings from then on.
               Expected value is a JSON key-value string to be sent as part of the Request Body for the POST /sessions
               endpoint in Livy.
               e.g. `%%spark config`
                    `{"driverMemory":"1000M", "executorCores":4}`
           run
               Run Spark code against a session.
               e.g. `%%spark -s testsession` will execute the cell code against the testsession previously created
               e.g. `%%spark -s testsession -c sql` will execute the SQL code against the testsession previously created
               e.g. `%%spark -s testsession -c sql -o my_var` will execute the SQL code against the testsession
                        previously created and store the pandas dataframe created in the my_var variable in the
                        Python environment.
           logs
               Returns the logs for a given session.
               e.g. `%spark logs -s testsession` will return the logs for the testsession previously created
           delete
               Delete a Livy session.
               e.g. `%spark delete -s defaultlivy`
           cleanup
               Delete all Livy sessions created by the notebook. No arguments required.
               e.g. `%spark cleanup`
        """
        usage = "Please look at usage of %spark by executing `%spark?`."
        user_input = line
        args = parse_argstring_or_throw(self.spark, user_input)

        subcommand = args.command[0].lower()

        # info
        if subcommand == "info":
            if args.url is not None:
                endpoint = Endpoint(args.url, args.user, args.password)
                info_sessions = self.spark_controller.get_all_sessions_endpoint_info(endpoint)
                self.print_endpoint_info(info_sessions)
            else:
                self._print_local_info()
        # config
        elif subcommand == "config":
            conf.override(conf.session_configs.__name__, json.loads(cell))
        # add
        elif subcommand == "add":
            if args.url is None:
                self.ipython_display.send_error("Need to supply URL argument (e.g. -u https://example.com/livyendpoint)")
                return

            name = args.session
            language = args.language
            endpoint = Endpoint(args.url, args.user, args.password)
            skip = args.skip

            properties = conf.get_session_properties(language)

            self.spark_controller.add_session(name, endpoint, skip, properties)
        # delete
        elif subcommand == "delete":
            if args.session is not None:
                self.spark_controller.delete_session_by_name(args.session)
            elif args.url is not None:
                if args.id is None:
                    self.ipython_display.send_error("Must provide --id or -i option to delete session at endpoint from URL")
                    return
                endpoint = Endpoint(args.url, args.user, args.password)
                session_id = args.id
                self.spark_controller.delete_session_by_id(endpoint, session_id)
            else:
                self.ipython_display.send_error("Subcommand 'delete' requires a session name or a URL and session ID")
        # cleanup
        elif subcommand == "cleanup":
            if args.url is not None:
                endpoint = Endpoint(args.url, args.user, args.password)
                self.spark_controller.cleanup_endpoint(endpoint)
            else:
                self.spark_controller.cleanup()
        # logs
        elif subcommand == "logs":
            if args.session is None:
                self.ipython_display.send_error("Need to provide session argument (-s SESSION_NAME)")
                return
            self.ipython_display.write(self.spark_controller.get_logs(args.session))
        # run
        elif len(subcommand) == 0:
            if args.session is None:
                self.ipython_display.send_error("Need to provide session argument (-s SESSION_NAME)")
                return
            if args.context == CONTEXT_NAME_SPARK:
                (success, out) = self.spark_controller.run_command(Command(cell), args.session)
                if success:
                    self.ipython_display.write(out)
                else:
                    self.ipython_display.send_error(out)
            elif args.context == CONTEXT_NAME_SQL:
                return self.execute_sqlquery(cell, args.samplemethod, args.maxrows, args.samplefraction,
                                             args.session, args.output, args.quiet)
            else:
                self.ipython_display.send_error("Context '{}' not found".format(args.context))
        # error
        else:
            self.ipython_display.send_error("Subcommand '{}' not found. {}".format(subcommand, usage))