def test_unicode():
    variable_name = u"collect 'è'"

    sparkcommand = SparkStoreCommand(variable_name, samplemethod='take', maxrows=120)
    assert_equals(sparkcommand._pyspark_command(variable_name),
                  Command(u'import sys\nfor {} in {}.toJSON(use_unicode=(sys.version_info.major > 2)).take(120): print({})'\
                          .format(LONG_RANDOM_VARIABLE_NAME, variable_name,
                                  LONG_RANDOM_VARIABLE_NAME)))
    assert_equals(sparkcommand._scala_command(variable_name),
                  Command(u'{}.toJSON.take(120).foreach(println)'.format(variable_name)))
def test_unicode():
    variable_name = u"collect 'è'"

    sparkcommand = SparkStoreCommand(variable_name, samplemethod='take', maxrows=120)
    assert_equals(sparkcommand._pyspark_command(variable_name),
                  Command(u'for {} in {}.toJSON().take(120): print({}.encode("{}"))'\
                          .format(LONG_RANDOM_VARIABLE_NAME, variable_name,
                                  LONG_RANDOM_VARIABLE_NAME, conf.pyspark_dataframe_encoding())))
    assert_equals(sparkcommand._scala_command(variable_name),
                  Command(u'{}.toJSON.take(120).foreach(println)'.format(variable_name)))
def test_execute_code():
    spark_events = MagicMock()
    variable_name = "abc"

    sparkcommand = SparkStoreCommand(variable_name, "take", 100, 0.2, spark_events=spark_events)
    sparkcommand.to_command = MagicMock(return_value=MagicMock())
    result = """{"z":100, "nullv":null, "y":50}
{"z":25, "nullv":null, "y":10}"""
    sparkcommand.to_command.return_value.execute = MagicMock(return_value=(True, result, MIMETYPE_TEXT_PLAIN))
    session = MagicMock()
    session.kind = "pyspark"
    result = sparkcommand.execute(session)
    
    sparkcommand.to_command.assert_called_once_with(session.kind, variable_name)
    sparkcommand.to_command.return_value.execute.assert_called_once_with(session)
Exemplo n.º 4
0
 def _spark_store_command(output_var, samplemethod, maxrows, samplefraction,
                          coerce):
     return SparkStoreCommand(output_var,
                              samplemethod,
                              maxrows,
                              samplefraction,
                              coerce=coerce)
def test_run_spark_command_exception_while_storing():
    run_cell_method = MagicMock()
    exception = LivyUnexpectedStatusException('WOW')
    run_cell_method.side_effect = [(True, ""), exception]
    spark_controller.run_command = run_cell_method

    command = "-s"
    name = "sessions_name"
    context = "-c"
    context_name = "spark"
    meth = "-m"
    method_name = "sample"
    output = "-o"
    output_var = "var_name"
    line = " ".join([
        command, name, context, context_name, meth, method_name, output,
        output_var
    ])
    cell = "cell code"

    result = magic.spark(line, cell)

    run_cell_method.assert_any_call(Command(cell), name)
    run_cell_method.assert_any_call(
        SparkStoreCommand(output_var, samplemethod=method_name), name)
    ipython_display.write.assert_called_once_with("")
    ipython_display.send_error.assert_called_once_with(
        EXPECTED_ERROR_MSG.format(exception))
def test_run_spark_with_store_correct_calls():
    run_cell_method = MagicMock()
    run_cell_method.return_value = (True, "")
    spark_controller.run_command = run_cell_method

    command = "-s"
    name = "sessions_name"
    context = "-c"
    context_name = "spark"
    meth = "-m"
    method_name = "sample"
    output = "-o"
    output_var = "var_name"
    coer = "--coerce"
    coerce_value = "True"
    line = " ".join([
        command, name, context, context_name, meth, method_name, output,
        output_var, coer, coerce_value
    ])
    cell = "cell code"

    result = magic.spark(line, cell)

    run_cell_method.assert_any_call(Command(cell), name)
    run_cell_method.assert_any_call(
        SparkStoreCommand(output_var, samplemethod=method_name, coerce=True),
        name)
def test_sparkstorecommand_initializes():
    variable_name = "var_name"
    samplemethod = "take"
    maxrows = 120
    samplefraction = 0.6
    sparkcommand = SparkStoreCommand(variable_name, samplemethod, maxrows, samplefraction)
    assert_equals(sparkcommand.samplemethod, samplemethod)
    assert_equals(sparkcommand.maxrows, maxrows)
    assert_equals(sparkcommand.samplefraction, samplefraction)
def test_sparkstorecommand_loads_defaults():
    defaults = {
        conf.default_samplemethod.__name__: "sample",
        conf.default_maxrows.__name__: 419,
        conf.default_samplefraction.__name__: 0.99,
    }
    conf.override_all(defaults)
    variable_name = "var_name"
    sparkcommand = SparkStoreCommand(variable_name)
    assert_equals(sparkcommand.samplemethod, defaults[conf.default_samplemethod.__name__])
    assert_equals(sparkcommand.maxrows, defaults[conf.default_maxrows.__name__])
    assert_equals(sparkcommand.samplefraction, defaults[conf.default_samplefraction.__name__])
def test_pyspark_livy_sampling_options():
    variable_name = "var_name"

    sparkcommand = SparkStoreCommand(variable_name, samplemethod='take', maxrows=120)
    assert_equals(sparkcommand._pyspark_command(variable_name),
                  Command(u'import sys\nfor {} in {}.toJSON(use_unicode=(sys.version_info.major > 2)).take(120): print({})'\
                          .format(LONG_RANDOM_VARIABLE_NAME, variable_name,
                                  LONG_RANDOM_VARIABLE_NAME)))

    sparkcommand = SparkStoreCommand(variable_name, samplemethod='take', maxrows=-1)
    assert_equals(sparkcommand._pyspark_command(variable_name),
                  Command(u'import sys\nfor {} in {}.toJSON(use_unicode=(sys.version_info.major > 2)).collect(): print({})'\
                          .format(LONG_RANDOM_VARIABLE_NAME, variable_name,
                                  LONG_RANDOM_VARIABLE_NAME)))

    sparkcommand = SparkStoreCommand(variable_name, samplemethod='sample', samplefraction=0.25, maxrows=-1)
    assert_equals(sparkcommand._pyspark_command(variable_name),
                  Command(u'import sys\nfor {} in {}.toJSON(use_unicode=(sys.version_info.major > 2)).sample(False, 0.25).collect(): '
                          u'print({})'\
                          .format(LONG_RANDOM_VARIABLE_NAME, variable_name,
                                  LONG_RANDOM_VARIABLE_NAME)))

    sparkcommand = SparkStoreCommand(variable_name, samplemethod='sample', samplefraction=0.33, maxrows=3234)
    assert_equals(sparkcommand._pyspark_command(variable_name),
                  Command(u'import sys\nfor {} in {}.toJSON(use_unicode=(sys.version_info.major > 2)).sample(False, 0.33).take(3234): '
                          u'print({})'\
                          .format(LONG_RANDOM_VARIABLE_NAME, variable_name,
                                  LONG_RANDOM_VARIABLE_NAME)))

    sparkcommand = SparkStoreCommand(variable_name, samplemethod='sample', samplefraction=0.33, maxrows=3234)
    assert_equals(sparkcommand._pyspark_command(variable_name),
                  Command(u'import sys\nfor {} in {}.toJSON(use_unicode=(sys.version_info.major > 2)).sample(False, 0.33).take(3234): '
                          u'print({})'\
                          .format(LONG_RANDOM_VARIABLE_NAME, variable_name, LONG_RANDOM_VARIABLE_NAME)))

    sparkcommand = SparkStoreCommand(variable_name, samplemethod=None, maxrows=100)
    assert_equals(sparkcommand._pyspark_command(variable_name),
                  Command(u'import sys\nfor {} in {}.toJSON(use_unicode=(sys.version_info.major > 2)).take(100): print({})'\
                          .format(LONG_RANDOM_VARIABLE_NAME, variable_name,
                                  LONG_RANDOM_VARIABLE_NAME)))

    sparkcommand = SparkStoreCommand(variable_name, samplemethod=None, maxrows=100)
    assert_equals(sparkcommand._pyspark_command(variable_name),
                  Command(u'import sys\nfor {} in {}.toJSON(use_unicode=(sys.version_info.major > 2)).take(100): print({})'\
                          .format(LONG_RANDOM_VARIABLE_NAME, variable_name,
                                  LONG_RANDOM_VARIABLE_NAME)))
def test_to_command_invalid():
    variable_name = "var_name"
    sparkcommand = SparkStoreCommand(variable_name)
    assert_raises(BadUserDataException, sparkcommand.to_command, "invalid", variable_name)
def test_to_command_r():
    variable_name = "var_name"
    sparkcommand = SparkStoreCommand(variable_name)
    sparkcommand._r_command = MagicMock(return_value=MagicMock())
    sparkcommand.to_command("sparkr", variable_name)
    sparkcommand._r_command.assert_called_with(variable_name)
def test_r_livy_sampling_options():
    variable_name = "abc"

    sparkcommand = SparkStoreCommand(variable_name, samplemethod='take', maxrows=100)

    assert_equals(sparkcommand._r_command(variable_name),
                  Command('for ({} in (jsonlite::toJSON(take({},100)))) {{cat({})}}'.format(LONG_RANDOM_VARIABLE_NAME, variable_name, LONG_RANDOM_VARIABLE_NAME)))

    sparkcommand = SparkStoreCommand(variable_name, samplemethod='take', maxrows=-1)
    assert_equals(sparkcommand._r_command(variable_name),
                  Command('for ({} in (jsonlite::toJSON(collect({})))) {{cat({})}}'.format(LONG_RANDOM_VARIABLE_NAME, variable_name, LONG_RANDOM_VARIABLE_NAME)))

    sparkcommand = SparkStoreCommand(variable_name, samplemethod='sample', samplefraction=0.25, maxrows=-1)
    assert_equals(sparkcommand._r_command(variable_name),
                  Command('for ({} in (jsonlite::toJSON(collect(sample({}, FALSE, 0.25))))) {{cat({})}}'.format(LONG_RANDOM_VARIABLE_NAME, variable_name, LONG_RANDOM_VARIABLE_NAME)))

    sparkcommand = SparkStoreCommand(variable_name, samplemethod='sample', samplefraction=0.33, maxrows=3234)
    assert_equals(sparkcommand._r_command(variable_name),
                 Command('for ({} in (jsonlite::toJSON(take(sample({}, FALSE, 0.33),3234)))) {{cat({})}}'.format(LONG_RANDOM_VARIABLE_NAME, variable_name, LONG_RANDOM_VARIABLE_NAME)))

    sparkcommand = SparkStoreCommand(variable_name, samplemethod=None, maxrows=100)
    assert_equals(sparkcommand._r_command(variable_name),
                  Command('for ({} in (jsonlite::toJSON(take({},100)))) {{cat({})}}'\
                          .format(LONG_RANDOM_VARIABLE_NAME, variable_name, 
                                  LONG_RANDOM_VARIABLE_NAME)))
def test_scala_livy_sampling_options():
    variable_name = "abc"

    sparkcommand = SparkStoreCommand(variable_name, samplemethod='take', maxrows=100)
    assert_equals(sparkcommand._scala_command(variable_name),
                  Command('{}.toJSON.take(100).foreach(println)'.format(variable_name)))

    sparkcommand = SparkStoreCommand(variable_name, samplemethod='take', maxrows=-1)
    assert_equals(sparkcommand._scala_command(variable_name),
                  Command('{}.toJSON.collect.foreach(println)'.format(variable_name)))

    sparkcommand = SparkStoreCommand(variable_name, samplemethod='sample', samplefraction=0.25, maxrows=-1)
    assert_equals(sparkcommand._scala_command(variable_name),
                  Command('{}.toJSON.sample(false, 0.25).collect.foreach(println)'.format(variable_name)))

    sparkcommand = SparkStoreCommand(variable_name, samplemethod='sample', samplefraction=0.33, maxrows=3234)
    assert_equals(sparkcommand._scala_command(variable_name),
                  Command('{}.toJSON.sample(false, 0.33).take(3234).foreach(println)'.format(variable_name)))
    
    sparkcommand = SparkStoreCommand(variable_name, samplemethod=None, maxrows=100)
    assert_equals(sparkcommand._scala_command(variable_name),
                  Command('{}.toJSON.take(100).foreach(println)'.format(variable_name)))
def test_pyspark_livy_sampling_options():
    variable_name = "var_name"

    sparkcommand = SparkStoreCommand(variable_name, samplemethod='take', maxrows=120)
    assert_equals(sparkcommand._pyspark_command(variable_name),
                  Command(u'for {} in {}.toJSON().take(120): print({}.encode("{}"))'\
                          .format(LONG_RANDOM_VARIABLE_NAME, variable_name,
                                  LONG_RANDOM_VARIABLE_NAME, conf.pyspark_dataframe_encoding())))

    sparkcommand = SparkStoreCommand(variable_name, samplemethod='take', maxrows=-1)
    assert_equals(sparkcommand._pyspark_command(variable_name),
                  Command(u'for {} in {}.toJSON().collect(): print({}.encode("{}"))'\
                          .format(LONG_RANDOM_VARIABLE_NAME, variable_name,
                                  LONG_RANDOM_VARIABLE_NAME, conf.pyspark_dataframe_encoding())))

    sparkcommand = SparkStoreCommand(variable_name, samplemethod='sample', samplefraction=0.25, maxrows=-1)
    assert_equals(sparkcommand._pyspark_command(variable_name),
                  Command(u'for {} in {}.toJSON().sample(False, 0.25).collect(): '
                          u'print({}.encode("{}"))'\
                          .format(LONG_RANDOM_VARIABLE_NAME, variable_name,
                                  LONG_RANDOM_VARIABLE_NAME, conf.pyspark_dataframe_encoding())))

    sparkcommand = SparkStoreCommand(variable_name, samplemethod='sample', samplefraction=0.33, maxrows=3234)
    assert_equals(sparkcommand._pyspark_command(variable_name),
                  Command(u'for {} in {}.toJSON().sample(False, 0.33).take(3234): '
                          u'print({}.encode("{}"))'\
                          .format(LONG_RANDOM_VARIABLE_NAME, variable_name,
                                  LONG_RANDOM_VARIABLE_NAME, conf.pyspark_dataframe_encoding())))

    sparkcommand = SparkStoreCommand(variable_name, samplemethod='sample', samplefraction=0.33, maxrows=3234)
    assert_equals(sparkcommand._pyspark_command(variable_name, False),
                  Command(u'for {} in {}.toJSON().sample(False, 0.33).take(3234): '
                          u'print({})'\
                          .format(LONG_RANDOM_VARIABLE_NAME, variable_name, LONG_RANDOM_VARIABLE_NAME)))

    sparkcommand = SparkStoreCommand(variable_name, samplemethod=None, maxrows=100)
    assert_equals(sparkcommand._pyspark_command(variable_name),
                  Command(u'for {} in {}.toJSON().take(100): print({}.encode("{}"))'\
                          .format(LONG_RANDOM_VARIABLE_NAME, variable_name,
                                  LONG_RANDOM_VARIABLE_NAME, conf.pyspark_dataframe_encoding())))

    sparkcommand = SparkStoreCommand(variable_name, samplemethod=None, maxrows=100)
    assert_equals(sparkcommand._pyspark_command(variable_name, False),
                  Command(u'for {} in {}.toJSON().take(100): print({})'\
                          .format(LONG_RANDOM_VARIABLE_NAME, variable_name,
                                  LONG_RANDOM_VARIABLE_NAME)))
def test_to_command_pyspark3():
    variable_name = "var_name"
    sparkcommand = SparkStoreCommand(variable_name)
    sparkcommand._pyspark_command = MagicMock(return_value=MagicMock())
    sparkcommand.to_command("pyspark3", variable_name)
    sparkcommand._pyspark_command.assert_called_with(variable_name, False)