def test_execute_failure_wait_for_session_emits_event(): spark_events = MagicMock() kind = SESSION_KIND_SPARK http_client = MagicMock() http_client.post_session.return_value = tls.TestLivySession.session_create_json http_client.post_statement.return_value = tls.TestLivySession.post_statement_json http_client.get_session.return_value = tls.TestLivySession.ready_sessions_json http_client.get_statement.return_value = tls.TestLivySession.ready_statement_json conf.override_all({ "status_sleep_seconds": 0.01, "statement_sleep_seconds": 0.01 }) session = _create_session(kind=kind, http_client=http_client) conf.override_all({}) session.start(create_sql_context=False) session.wait_for_idle = MagicMock(side_effect=ValueError("yo")) command = Command("command", spark_events=spark_events) try: result = command.execute(session) assert False except ValueError as e: spark_events.emit_statement_execution_start_event._assert_called_once_with(session.guid, session.kind, session.id, command.guid) spark_events.emit_statement_execution_start_event._assert_called_once_with(session.guid, session.kind, session.id, command.guid, -1, False, "ValueError", "yo") assert_equals(e, session.wait_for_idle.side_effect)
def test_execute_failure_post_statement_emits_event(): spark_events = MagicMock() kind = SESSION_KIND_SPARK http_client = MagicMock() http_client.get_statement.return_value = tls.TestLivySession.ready_statement_json conf.override_all({"status_sleep_seconds": 0.01, "statement_sleep_seconds": 0.01}) session = _create_session(kind=kind, http_client=http_client) session.wait_for_idle = MagicMock() conf.override_all({}) session.start() session.wait_for_idle = MagicMock() command = Command("command", spark_events=spark_events) http_client.post_statement.side_effect = KeyError("Something bad happened here") try: result = command.execute(session) assert False except KeyError as e: spark_events.emit_statement_execution_start_event.assert_called_once_with( session.guid, session.kind, session.id, command.guid ) spark_events.emit_statement_execution_end_event._assert_called_once_with( session.guid, session.kind, session.id, command.guid, -1, False, "KeyError", "Something bad happened here" ) assert_equals(e, http_client.post_statement.side_effect)
def test_execute(): spark_events = MagicMock() kind = SESSION_KIND_SPARK http_client = MagicMock() http_client.post_session.return_value = tls.TestLivySession.session_create_json http_client.post_statement.return_value = tls.TestLivySession.post_statement_json http_client.get_session.return_value = tls.TestLivySession.ready_sessions_json http_client.get_statement.return_value = tls.TestLivySession.ready_statement_json session = _create_session(kind=kind, http_client=http_client) session.start() command = Command("command", spark_events=spark_events) result = command.execute(session) http_client.post_statement.assert_called_with(0, {"code": command.code}) http_client.get_statement.assert_called_with(0, 0) assert result[0] assert_equals(tls.TestLivySession.pi_result, result[1]) spark_events.emit_statement_execution_start_event.assert_called_once_with(session.guid, session.kind, session.id, command.guid) spark_events.emit_statement_execution_end_event.assert_called_once_with(session.guid, session.kind, session.id, command.guid, 0, True, "", "") # Now try with PNG result: http_client.get_statement.return_value = {"id":0,"state":"available","output":{"status":"ok", "execution_count":0,"data":{"text/plain":"", "image/png": b64encode(b"hello")}}} result = command.execute(session) assert result[0] assert isinstance(result[1], Image) assert result[1].data == b"hello"
def test_pyspark_livy_sql_options_spark2(): query = "abc" sqlquery = SQLQuery(query, samplemethod='take', maxrows=120) assert_equals(sqlquery._pyspark_command("spark"), Command(u'import sys\nfor {} in spark.sql(u"""{} """).toJSON(use_unicode=(sys.version_info.major > 2)).take(120): print({})'\ .format(LONG_RANDOM_VARIABLE_NAME, query, LONG_RANDOM_VARIABLE_NAME))) sqlquery = SQLQuery(query, samplemethod='take', maxrows=-1) assert_equals(sqlquery._pyspark_command("spark"), Command(u'import sys\nfor {} in spark.sql(u"""{} """).toJSON(use_unicode=(sys.version_info.major > 2)).collect(): print({})'\ .format(LONG_RANDOM_VARIABLE_NAME, query, LONG_RANDOM_VARIABLE_NAME))) sqlquery = SQLQuery(query, samplemethod='sample', samplefraction=0.25, maxrows=-1) assert_equals(sqlquery._pyspark_command("spark"), Command(u'import sys\nfor {} in spark.sql(u"""{} """).toJSON(use_unicode=(sys.version_info.major > 2)).sample(False, 0.25).collect(): ' u'print({})'\ .format(LONG_RANDOM_VARIABLE_NAME, query, LONG_RANDOM_VARIABLE_NAME))) sqlquery = SQLQuery(query, samplemethod='sample', samplefraction=0.33, maxrows=3234) assert_equals(sqlquery._pyspark_command("spark"), Command(u'import sys\nfor {} in spark.sql(u"""{} """).toJSON(use_unicode=(sys.version_info.major > 2)).sample(False, 0.33).take(3234): ' u'print({})'\ .format(LONG_RANDOM_VARIABLE_NAME, query, LONG_RANDOM_VARIABLE_NAME)))
def test_r_livy_sampling_options(): variable_name = "abc" sparkcommand = SparkStoreCommand(variable_name, samplemethod='take', maxrows=100) assert_equals(sparkcommand._r_command(variable_name), Command('for ({} in (jsonlite::toJSON(take({},100)))) {{cat({})}}'.format(LONG_RANDOM_VARIABLE_NAME, variable_name, LONG_RANDOM_VARIABLE_NAME))) sparkcommand = SparkStoreCommand(variable_name, samplemethod='take', maxrows=-1) assert_equals(sparkcommand._r_command(variable_name), Command('for ({} in (jsonlite::toJSON(collect({})))) {{cat({})}}'.format(LONG_RANDOM_VARIABLE_NAME, variable_name, LONG_RANDOM_VARIABLE_NAME))) sparkcommand = SparkStoreCommand(variable_name, samplemethod='sample', samplefraction=0.25, maxrows=-1) assert_equals(sparkcommand._r_command(variable_name), Command('for ({} in (jsonlite::toJSON(collect(sample({}, FALSE, 0.25))))) {{cat({})}}'.format(LONG_RANDOM_VARIABLE_NAME, variable_name, LONG_RANDOM_VARIABLE_NAME))) sparkcommand = SparkStoreCommand(variable_name, samplemethod='sample', samplefraction=0.33, maxrows=3234) assert_equals(sparkcommand._r_command(variable_name), Command('for ({} in (jsonlite::toJSON(take(sample({}, FALSE, 0.33),3234)))) {{cat({})}}'.format(LONG_RANDOM_VARIABLE_NAME, variable_name, LONG_RANDOM_VARIABLE_NAME))) sparkcommand = SparkStoreCommand(variable_name, samplemethod=None, maxrows=100) assert_equals(sparkcommand._r_command(variable_name), Command('for ({} in (jsonlite::toJSON(take({},100)))) {{cat({})}}'\ .format(LONG_RANDOM_VARIABLE_NAME, variable_name, LONG_RANDOM_VARIABLE_NAME)))
def test_execute_failure_wait_for_session_emits_event(): spark_events = MagicMock() kind = SESSION_KIND_SPARK http_client = MagicMock() http_client.post_session.return_value = tls.TestLivySession.session_create_json http_client.post_statement.return_value = tls.TestLivySession.post_statement_json http_client.get_session.return_value = tls.TestLivySession.ready_sessions_json http_client.get_statement.return_value = tls.TestLivySession.ready_statement_json conf.override_all({ "status_sleep_seconds": 0.01, "statement_sleep_seconds": 0.01 }) session = _create_session(kind=kind, http_client=http_client) conf.override_all({}) session.start(create_sql_context=False) session.wait_for_idle = MagicMock(side_effect=ValueError("yo")) command = Command("command", spark_events=spark_events) try: result = command.execute(session) assert False except ValueError as e: spark_events.emit_statement_execution_start_event._assert_called_once_with( session.guid, session.kind, session.id, command.guid) spark_events.emit_statement_execution_start_event._assert_called_once_with( session.guid, session.kind, session.id, command.guid, -1, False, "ValueError", "yo") assert_equals(e, session.wait_for_idle.side_effect)
def test_scala_livy_sql_options(): query = "abc" sqlquery = SQLQuery(query, samplemethod='take', maxrows=100) assert_equals( sqlquery._scala_command(), Command('sqlContext.sql("""{}""").toJSON.take(100).foreach(println)'. format(query))) sqlquery = SQLQuery(query, samplemethod='take', maxrows=-1) assert_equals( sqlquery._scala_command(), Command( 'sqlContext.sql("""{}""").toJSON.collect.foreach(println)'.format( query))) sqlquery = SQLQuery(query, samplemethod='sample', samplefraction=0.25, maxrows=-1) assert_equals( sqlquery._scala_command(), Command( 'sqlContext.sql("""{}""").toJSON.sample(false, 0.25).collect.foreach(println)' .format(query))) sqlquery = SQLQuery(query, samplemethod='sample', samplefraction=0.33, maxrows=3234) assert_equals( sqlquery._scala_command(), Command( 'sqlContext.sql("""{}""").toJSON.sample(false, 0.33).take(3234).foreach(println)' .format(query)))
def test_execute(): spark_events = MagicMock() kind = SESSION_KIND_SPARK http_client = MagicMock() http_client.post_session.return_value = tls.TestLivySession.session_create_json http_client.post_statement.return_value = tls.TestLivySession.post_statement_json http_client.get_session.return_value = tls.TestLivySession.ready_sessions_json http_client.get_statement.return_value = tls.TestLivySession.ready_statement_json conf.override_all({ "status_sleep_seconds": 0.01, "statement_sleep_seconds": 0.01 }) session = _create_session(kind=kind, http_client=http_client) conf.override_all({}) session.start() command = Command("command", spark_events=spark_events) result = command.execute(session) http_client.post_statement.assert_called_with(0, {"code": command.code}) http_client.get_statement.assert_called_with(0, 0) assert result[0] assert_equals(tls.TestLivySession.pi_result, result[1]) spark_events.emit_statement_execution_start_event.assert_called_once_with( session.guid, session.kind, session.id, command.guid) spark_events.emit_statement_execution_end_event.assert_called_once_with( session.guid, session.kind, session.id, command.guid, 0, True, "", "")
def test_execute_failure_get_statement_output_emits_event(): spark_events = MagicMock() kind = SESSION_KIND_SPARK http_client = MagicMock() conf.override_all({ "status_sleep_seconds": 0.01, "statement_sleep_seconds": 0.01 }) session = _create_session(kind=kind, http_client=http_client) session.wait_for_idle = MagicMock() conf.override_all({}) session.start(create_sql_context=False) session.wait_for_idle = MagicMock() command = Command("command", spark_events=spark_events) command._get_statement_output = MagicMock( side_effect=AttributeError('OHHHH')) try: result = command.execute(session) assert False except AttributeError as e: spark_events.emit_statement_execution_start_event._assert_called_once_with( session.guid, session.kind, session.id, command.guid) spark_events.emit_statement_execution_start_event._assert_called_once_with( session.guid, session.kind, session.id, command.guid, -1, False, "AttributeError", "OHHHH") assert_equals(e, command._get_statement_output.side_effect)
def test_execute_waiting(): spark_events = MagicMock() kind = SESSION_KIND_SPARK http_client = MagicMock() http_client.post_session.return_value = tls.TestLivySession.session_create_json http_client.post_statement.return_value = tls.TestLivySession.post_statement_json http_client.get_session.return_value = tls.TestLivySession.ready_sessions_json http_client.get_statement.side_effect = [ tls.TestLivySession.waiting_statement_json, tls.TestLivySession.waiting_statement_json, tls.TestLivySession.ready_statement_json, tls.TestLivySession.ready_statement_json ] session = _create_session(kind=kind, http_client=http_client) session.start() command = Command("command", spark_events=spark_events) result = command.execute(session) http_client.post_statement.assert_called_with(0, {"code": command.code}) http_client.get_statement.assert_called_with(0, 0) assert result[0] assert_equals(tls.TestLivySession.pi_result, result[1]) assert_equals(MIMETYPE_TEXT_PLAIN, result[2]) spark_events.emit_statement_execution_start_event.assert_called_once_with( session.guid, session.kind, session.id, command.guid) spark_events.emit_statement_execution_end_event.assert_called_once_with( session.guid, session.kind, session.id, command.guid, 0, True, "", "")
def test_execute_failure_post_statement_emits_event(): spark_events = MagicMock() kind = SESSION_KIND_SPARK http_client = MagicMock() http_client.get_statement.return_value = tls.TestLivySession.ready_statement_json conf.override_all({ "status_sleep_seconds": 0.01, "statement_sleep_seconds": 0.01 }) session = _create_session(kind=kind, http_client=http_client) session.wait_for_idle = MagicMock() conf.override_all({}) session.start() session.wait_for_idle = MagicMock() command = Command("command", spark_events=spark_events) http_client.post_statement.side_effect = KeyError( 'Something bad happened here') try: result = command.execute(session) assert False except KeyError as e: spark_events.emit_statement_execution_start_event.assert_called_once_with( session.guid, session.kind, session.id, command.guid) spark_events.emit_statement_execution_end_event._assert_called_once_with( session.guid, session.kind, session.id, command.guid, -1, False, "KeyError", "Something bad happened here") assert_equals(e, http_client.post_statement.side_effect)
def test_pyspark_livy_sql_options(): query = "abc" sqlquery = SQLQuery(query, samplemethod='take', maxrows=120) assert_equals(sqlquery._pyspark_command(), Command(u'for {} in sqlContext.sql(u"""{} """).toJSON().take(120): print({}.encode("{}"))'\ .format(LONG_RANDOM_VARIABLE_NAME, query, LONG_RANDOM_VARIABLE_NAME, conf.pyspark_sql_encoding()))) sqlquery = SQLQuery(query, samplemethod='take', maxrows=-1) assert_equals(sqlquery._pyspark_command(), Command(u'for {} in sqlContext.sql(u"""{} """).toJSON().collect(): print({}.encode("{}"))'\ .format(LONG_RANDOM_VARIABLE_NAME, query, LONG_RANDOM_VARIABLE_NAME, conf.pyspark_sql_encoding()))) sqlquery = SQLQuery(query, samplemethod='sample', samplefraction=0.25, maxrows=-1) assert_equals(sqlquery._pyspark_command(), Command(u'for {} in sqlContext.sql(u"""{} """).toJSON().sample(False, 0.25).collect(): ' u'print({}.encode("{}"))'\ .format(LONG_RANDOM_VARIABLE_NAME, query, LONG_RANDOM_VARIABLE_NAME, conf.pyspark_sql_encoding()))) sqlquery = SQLQuery(query, samplemethod='sample', samplefraction=0.33, maxrows=3234) assert_equals(sqlquery._pyspark_command(), Command(u'for {} in sqlContext.sql(u"""{} """).toJSON().sample(False, 0.33).take(3234): ' u'print({}.encode("{}"))'\ .format(LONG_RANDOM_VARIABLE_NAME, query, LONG_RANDOM_VARIABLE_NAME, conf.pyspark_sql_encoding())))
def test_execute_failure_get_statement_output_emits_event(): spark_events = MagicMock() kind = SESSION_KIND_SPARK http_client = MagicMock() conf.override_all({ "status_sleep_seconds": 0.01, "statement_sleep_seconds": 0.01 }) session = _create_session(kind=kind, http_client=http_client) session.wait_for_idle = MagicMock() conf.override_all({}) session.start(create_sql_context=False) session.wait_for_idle = MagicMock() command = Command("command", spark_events=spark_events) command._get_statement_output = MagicMock(side_effect=AttributeError('OHHHH')) try: result = command.execute(session) assert False except AttributeError as e: spark_events.emit_statement_execution_start_event._assert_called_once_with(session.guid, session.kind, session.id, command.guid) spark_events.emit_statement_execution_start_event._assert_called_once_with(session.guid, session.kind, session.id, command.guid, -1, False, "AttributeError", "OHHHH") assert_equals(e, command._get_statement_output.side_effect)
def test_execute(): spark_events = MagicMock() kind = SESSION_KIND_SPARK http_client = MagicMock() http_client.post_session.return_value = tls.TestLivySession.session_create_json http_client.post_statement.return_value = tls.TestLivySession.post_statement_json http_client.get_session.return_value = tls.TestLivySession.ready_sessions_json http_client.get_statement.return_value = tls.TestLivySession.ready_statement_json conf.override_all({"status_sleep_seconds": 0.01, "statement_sleep_seconds": 0.01}) session = _create_session(kind=kind, http_client=http_client) conf.override_all({}) session.start() command = Command("command", spark_events=spark_events) result = command.execute(session) http_client.post_statement.assert_called_with(0, {"code": command.code}) http_client.get_statement.assert_called_with(0, 0) assert result[0] assert_equals(tls.TestLivySession.pi_result, result[1]) spark_events.emit_statement_execution_start_event.assert_called_once_with( session.guid, session.kind, session.id, command.guid ) spark_events.emit_statement_execution_end_event.assert_called_once_with( session.guid, session.kind, session.id, command.guid, 0, True, "", "" )
def matplot(self, line, cell="", local_ns=None): session = self.spark_controller.get_session_by_name_or_default(self.session_name) command = Command("%matplot " + line) (success, out, mimetype) = command.execute(session) if success: session.ipython_display.display(out) else: session.ipython_display.send_error(out)
def test_unicode(): variable_name = u"collect 'è'" sparkcommand = SparkStoreCommand(variable_name, samplemethod='take', maxrows=120) assert_equals(sparkcommand._pyspark_command(variable_name), Command(u'for {} in {}.toJSON().take(120): print({}.encode("{}"))'\ .format(LONG_RANDOM_VARIABLE_NAME, variable_name, LONG_RANDOM_VARIABLE_NAME, conf.pyspark_dataframe_encoding()))) assert_equals(sparkcommand._scala_command(variable_name), Command(u'{}.toJSON.take(120).foreach(println)'.format(variable_name)))
def test_unicode(): variable_name = u"collect 'è'" sparkcommand = SparkStoreCommand(variable_name, samplemethod='take', maxrows=120) assert_equals(sparkcommand._pyspark_command(variable_name), Command(u'import sys\nfor {} in {}.toJSON(use_unicode=(sys.version_info.major > 2)).take(120): print({})'\ .format(LONG_RANDOM_VARIABLE_NAME, variable_name, LONG_RANDOM_VARIABLE_NAME))) assert_equals(sparkcommand._scala_command(variable_name), Command(u'{}.toJSON.take(120).foreach(println)'.format(variable_name)))
def test_unicode_sql(): query = u"SELECT 'è'" sqlquery = SQLQuery(query, samplemethod='take', maxrows=120) assert_equals(sqlquery._pyspark_command("spark"), Command(u'for {} in spark.sql(u"""{} """).toJSON().take(120): print({}.encode("{}"))'\ .format(LONG_RANDOM_VARIABLE_NAME, query, LONG_RANDOM_VARIABLE_NAME, conf.pyspark_dataframe_encoding()))) assert_equals(sqlquery._scala_command("spark"), Command(u'spark.sql("""{}""").toJSON.take(120).foreach(println)'.format(query))) assert_equals(sqlquery._r_command("spark"), Command(u'for ({} in (jsonlite:::toJSON(take(sql("{}"),120)))) {{cat({})}}'.format(LONG_RANDOM_VARIABLE_NAME, query, LONG_RANDOM_VARIABLE_NAME)))
def execute_spark(self, cell, output_var, samplemethod, maxrows, samplefraction, session_name, coerce): (success, out, mimetype) = self.spark_controller.run_command(Command(cell), session_name) if not success: if conf.spark_statement_errors_are_fatal(): if conf.shutdown_session_on_spark_statement_errors(): self.spark_controller.cleanup() raise SparkStatementException(out) self.ipython_display.send_error(out) else: if isinstance(out, string_types): if mimetype == MIMETYPE_TEXT_HTML: self.ipython_display.html(out) else: self.ipython_display.write(out) else: self.ipython_display.display(out) if output_var is not None: spark_store_command = self._spark_store_command( output_var, samplemethod, maxrows, samplefraction, coerce) df = self.spark_controller.run_command(spark_store_command, session_name) self.shell.user_ns[output_var] = df
def test_run_spark_with_store_correct_calls(): run_cell_method = MagicMock() run_cell_method.return_value = (True, "") spark_controller.run_command = run_cell_method command = "-s" name = "sessions_name" context = "-c" context_name = "spark" meth = "-m" method_name = "sample" output = "-o" output_var = "var_name" coer = "--coerce" coerce_value = "True" line = " ".join([ command, name, context, context_name, meth, method_name, output, output_var, coer, coerce_value ]) cell = "cell code" result = magic.spark(line, cell) run_cell_method.assert_any_call(Command(cell), name) run_cell_method.assert_any_call( SparkStoreCommand(output_var, samplemethod=method_name, coerce=True), name)
def _scala_command(self, input_variable_name, input_variable_value, output_variable_name): self._assert_input_is_string_type(input_variable_name, input_variable_value) scala_code = u'var {} = """{}"""'.format(output_variable_name, input_variable_value) return Command(scala_code)
def _pyspark_command(self, input_variable_name, input_variable_value, output_variable_name): self._assert_input_is_string_type(input_variable_name, input_variable_value) pyspark_code = u'{} = {}'.format(output_variable_name, repr(input_variable_value)) return Command(pyspark_code)
def test_run_spark_command_exception_while_storing(): run_cell_method = MagicMock() exception = LivyUnexpectedStatusException('WOW') run_cell_method.side_effect = [(True, ""), exception] spark_controller.run_command = run_cell_method command = "-s" name = "sessions_name" context = "-c" context_name = "spark" meth = "-m" method_name = "sample" output = "-o" output_var = "var_name" line = " ".join([ command, name, context, context_name, meth, method_name, output, output_var ]) cell = "cell code" result = magic.spark(line, cell) run_cell_method.assert_any_call(Command(cell), name) run_cell_method.assert_any_call( SparkStoreCommand(output_var, samplemethod=method_name), name) ipython_display.write.assert_called_once_with("") ipython_display.send_error.assert_called_once_with( EXPECTED_ERROR_MSG.format(exception))
def test_execute_interrupted(): spark_events = MagicMock() kind = SESSION_KIND_SPARK http_client = MagicMock() http_client.get_statement.return_value = tls.TestLivySession.ready_statement_json session = _create_session(kind=kind, http_client=http_client) session.wait_for_idle = MagicMock() session.start() session.wait_for_idle = MagicMock() command = Command("command", spark_events=spark_events) mock_ipython = MagicMock() mock_get_ipython = lambda: mock_ipython mock_ipython._showtraceback = mock_show_tb = MagicMock() sparkmagic.livyclientlib.exceptions.get_ipython = mock_get_ipython http_client.post_statement.side_effect = KeyboardInterrupt("") try: result = command.execute(session) assert False except KeyboardInterrupt as e: spark_events.emit_statement_execution_start_event.assert_called_once_with( session.guid, session.kind, session.id, command.guid) spark_events.emit_statement_execution_end_event._assert_called_once_with( session.guid, session.kind, session.id, command.guid, -1, False, "KeyboardInterrupt", "") assert isinstance(e, SparkStatementCancelledException) assert_equals(str(e), COMMAND_INTERRUPTED_MSG) # Test patching _showtraceback() assert mock_ipython._showtraceback is SparkStatementCancelledException._show_tb with _capture_stderr() as stderr: mock_ipython._showtraceback(KeyError, "Dummy KeyError", MagicMock()) mock_show_tb.assert_called_once() assert not stderr.getvalue() with _capture_stderr() as stderr: mock_ipython._showtraceback(SparkStatementCancelledException, COMMAND_INTERRUPTED_MSG, MagicMock()) mock_show_tb.assert_called_once() # still once assert_equals(stderr.getvalue().strip(), COMMAND_INTERRUPTED_MSG) except: assert False else: assert False
def _r_command(self, input_variable_name, input_variable_value, output_variable_name): self._assert_input_is_string_type(input_variable_name, input_variable_value) escaped_input_variable_value = input_variable_value.replace( u'\\', u'\\\\').replace(u'"', u'\\"') r_code = u'''assign("{}","{}")'''.format(output_variable_name, escaped_input_variable_value) return Command(r_code)
def test_spark_expected_exception(): line = "" cell = "some spark code" spark_controller.run_command = MagicMock(side_effect=SessionManagementException('oups')) magic.spark(line, cell) spark_controller.run_command.assert_called_once_with(Command(cell), None) ipython_display.send_error.assert_called_once_with(constants.EXPECTED_ERROR_MSG .format(spark_controller.run_command.side_effect))
def test_r_livy_sql_options_spark2(): query = "abc" sqlquery = SQLQuery(query, samplemethod='take', maxrows=100) assert_equals(sqlquery._r_command("spark"), Command('for ({} in (jsonlite:::toJSON(take(sql("{}"),100)))) {{cat({})}}'.format(LONG_RANDOM_VARIABLE_NAME, query, LONG_RANDOM_VARIABLE_NAME))) sqlquery = SQLQuery(query, samplemethod='take', maxrows=-1) assert_equals(sqlquery._r_command("spark"), Command('for ({} in (jsonlite:::toJSON(collect(sql("{}"))))) {{cat({})}}'.format(LONG_RANDOM_VARIABLE_NAME, query, LONG_RANDOM_VARIABLE_NAME))) sqlquery = SQLQuery(query, samplemethod='sample', samplefraction=0.25, maxrows=-1) assert_equals(sqlquery._r_command("spark"), Command('for ({} in (jsonlite:::toJSON(collect(sample(sql("{}"), FALSE, 0.25))))) {{cat({})}}'.format(LONG_RANDOM_VARIABLE_NAME, query, LONG_RANDOM_VARIABLE_NAME))) sqlquery = SQLQuery(query, samplemethod='sample', samplefraction=0.33, maxrows=3234) assert_equals(sqlquery._r_command("spark"), Command('for ({} in (jsonlite:::toJSON(take(sample(sql("{}"), FALSE, 0.33),3234)))) {{cat({})}}'.format(LONG_RANDOM_VARIABLE_NAME, query, LONG_RANDOM_VARIABLE_NAME)))
def test_spark_unexpected_exception(): line = "" cell = "some spark code" spark_controller.run_command = MagicMock(side_effect=Exception('oups')) magic.spark(line, cell) spark_controller.run_command.assert_called_once_with(Command(cell)) ipython_display.send_error.assert_called_once_with(constants.INTERNAL_ERROR_MSG .format(spark_controller.run_command.side_effect))
def test_unicode_sql(): query = u"SELECT 'è'" sqlquery = SQLQuery(query, samplemethod='take', maxrows=120) assert_equals(sqlquery._pyspark_command(), Command(u'for {} in sqlContext.sql(u"""{} """).toJSON().take(120): print({}.encode("{}"))'\ .format(LONG_RANDOM_VARIABLE_NAME, query, LONG_RANDOM_VARIABLE_NAME, conf.pyspark_sql_encoding()))) assert_equals( sqlquery._scala_command(), Command(u'sqlContext.sql("""{}""").toJSON.take(120).foreach(println)'. format(query))) try: sqlquery._r_command() assert False except NotImplementedError: pass
def _scala_command(self, spark_context_variable_name): command = u'{}.toJSON'.format(spark_context_variable_name) if self.samplemethod == u'sample': command = u'{}.sample(false, {})'.format(command, self.samplefraction) if self.maxrows >= 0: command = u'{}.take({})'.format(command, self.maxrows) else: command = u'{}.collect'.format(command) return Command(u'{}.foreach(println)'.format(command))
def test_unicode_sql(): query = u"SELECT 'è'" longvar = LONG_RANDOM_VARIABLE_NAME sqlquery = SQLQuery(query, samplemethod='take', maxrows=120) assert_equals(sqlquery._pyspark_command("spark"), Command(u'import sys\nfor {} in spark.sql(u"""{} """).toJSON(use_unicode=(sys.version_info.major > 2)).take(120): print({})'\ .format(longvar, query, longvar))) assert_equals( sqlquery._scala_command("spark"), Command( u'spark.sql("""{}""").toJSON.take(120).foreach(println)'.format( query))) assert_equals( sqlquery._r_command("spark"), Command( u'for ({} in (jsonlite:::toJSON(take(sql("{}"),120)))) {{cat({})}}' .format(longvar, query, longvar)))
def test_scala_livy_sampling_options(): variable_name = "abc" sparkcommand = SparkStoreCommand(variable_name, samplemethod='take', maxrows=100) assert_equals( sparkcommand._scala_command(variable_name), Command('{}.toJSON.take(100).foreach(println)'.format(variable_name))) sparkcommand = SparkStoreCommand(variable_name, samplemethod='take', maxrows=-1) assert_equals( sparkcommand._scala_command(variable_name), Command('{}.toJSON.collect.foreach(println)'.format(variable_name))) sparkcommand = SparkStoreCommand(variable_name, samplemethod='sample', samplefraction=0.25, maxrows=-1) assert_equals( sparkcommand._scala_command(variable_name), Command( '{}.toJSON.sample(false, 0.25).collect.foreach(println)'.format( variable_name))) sparkcommand = SparkStoreCommand(variable_name, samplemethod='sample', samplefraction=0.33, maxrows=3234) assert_equals( sparkcommand._scala_command(variable_name), Command( '{}.toJSON.sample(false, 0.33).take(3234).foreach(println)'.format( variable_name))) sparkcommand = SparkStoreCommand(variable_name, samplemethod=None, maxrows=100) assert_equals( sparkcommand._scala_command(variable_name), Command('{}.toJSON.take(100).foreach(println)'.format(variable_name)))
def spark(self, line, cell="", local_ns=None): parse_argstring_or_throw(self.spark, line) if self._do_not_call_start_session(u""): (success, out) = self.spark_controller.run_command(Command(cell)) if success: self.ipython_display.write(out) else: self.ipython_display.send_error(out) else: return None
def test_spark_expected_exception_in_storing(): line = "-o var_name" cell = "some spark code" side_effect = [(True,'ok',constants.MIMETYPE_TEXT_PLAIN), SessionManagementException('oups')] spark_controller.run_command = MagicMock(side_effect=side_effect) magic.spark(line, cell) assert spark_controller.run_command.call_count == 2 spark_controller.run_command.assert_any_call(Command(cell), None) ipython_display.send_error.assert_called_with(constants.EXPECTED_ERROR_MSG .format(side_effect[1]))