Пример #1
0
def test_execute_sql_no_results():
    global executed_once
    executed_once = False
    spark_events = MagicMock()
    sqlquery = SQLQuery("SHOW TABLES", "take", maxrows=-1, spark_events=spark_events)
    sqlquery.to_command = MagicMock()
    sqlquery.to_only_columns_query = MagicMock()
    result1 = ""
    result_data = pd.DataFrame([])
    session = MagicMock()
    sqlquery.to_command.return_value.execute.return_value = (True, result1)
    session.kind = "spark"
    result = sqlquery.execute(session)
    assert_frame_equal(result, result_data)
    sqlquery.to_command.return_value.execute.assert_called_once_with(session)
    spark_events.emit_sql_execution_start_event.assert_called_once_with(
        session.guid,
        session.kind,
        session.id,
        sqlquery.guid,
        sqlquery.samplemethod,
        sqlquery.maxrows,
        sqlquery.samplefraction,
    )
    spark_events.emit_sql_execution_end_event.assert_called_once_with(
        session.guid, session.kind, session.id, sqlquery.guid, sqlquery.to_command.return_value.guid, True, "", ""
    )
Пример #2
0
def test_execute_sql():
    spark_events = MagicMock()
    sqlquery = SQLQuery("HERE IS THE QUERY",
                        "take",
                        100,
                        0.2,
                        spark_events=spark_events)
    sqlquery.to_command = MagicMock(return_value=MagicMock())
    result = """{"z":100, "nullv":null, "y":50}
{"z":25, "nullv":null, "y":10}"""
    sqlquery.to_command.return_value.execute = MagicMock(return_value=(True,
                                                                       result))
    result_data = pd.DataFrame([{
        'z': 100,
        "nullv": None,
        'y': 50
    }, {
        'z': 25,
        "nullv": None,
        'y': 10
    }],
                               columns=['z', "nullv", 'y'])
    session = MagicMock()
    session.kind = "pyspark"
    result = sqlquery.execute(session)
    assert_frame_equal(result, result_data)
    sqlquery.to_command.return_value.execute.assert_called_once_with(session)
    spark_events.emit_sql_execution_start_event.assert_called_once_with(
        session.guid, session.kind, session.id, sqlquery.guid, 'take', 100,
        0.2)
    spark_events.emit_sql_execution_end_event.assert_called_once_with(
        session.guid, session.kind, session.id, sqlquery.guid,
        sqlquery.to_command.return_value.guid, True, '', '')
Пример #3
0
def test_pyspark_livy_sql_options():
    query = "abc"

    sqlquery = SQLQuery(query, samplemethod='take', maxrows=120)
    assert_equals(sqlquery._pyspark_command("sqlContext"),
                  Command(u'for {} in sqlContext.sql(u"""{} """).toJSON().take(120): print({}.encode("{}"))'\
                          .format(LONG_RANDOM_VARIABLE_NAME, query,
                                  LONG_RANDOM_VARIABLE_NAME, conf.pyspark_sql_encoding())))

    sqlquery = SQLQuery(query, samplemethod='take', maxrows=-1)
    assert_equals(sqlquery._pyspark_command("sqlContext"),
                  Command(u'for {} in sqlContext.sql(u"""{} """).toJSON().collect(): print({}.encode("{}"))'\
                          .format(LONG_RANDOM_VARIABLE_NAME, query,
                                  LONG_RANDOM_VARIABLE_NAME, conf.pyspark_sql_encoding())))

    sqlquery = SQLQuery(query, samplemethod='sample', samplefraction=0.25, maxrows=-1)
    assert_equals(sqlquery._pyspark_command("sqlContext"),
                  Command(u'for {} in sqlContext.sql(u"""{} """).toJSON().sample(False, 0.25).collect(): '
                          u'print({}.encode("{}"))'\
                          .format(LONG_RANDOM_VARIABLE_NAME, query,
                                  LONG_RANDOM_VARIABLE_NAME, conf.pyspark_sql_encoding())))

    sqlquery = SQLQuery(query, samplemethod='sample', samplefraction=0.33, maxrows=3234)
    assert_equals(sqlquery._pyspark_command("sqlContext"),
                  Command(u'for {} in sqlContext.sql(u"""{} """).toJSON().sample(False, 0.33).take(3234): '
                          u'print({}.encode("{}"))'\
                          .format(LONG_RANDOM_VARIABLE_NAME, query,
                                  LONG_RANDOM_VARIABLE_NAME, conf.pyspark_sql_encoding())))

    sqlquery = SQLQuery(query, samplemethod='sample', samplefraction=0.33, maxrows=3234)
    assert_equals(sqlquery._pyspark_command("spark", False),
                  Command(u'for {} in spark.sql(u"""{} """).toJSON().sample(False, 0.33).take(3234): '
                          u'print({})'\
                          .format(LONG_RANDOM_VARIABLE_NAME, query, LONG_RANDOM_VARIABLE_NAME)))
Пример #4
0
def test_execute_sql_no_results():
    global executed_once
    executed_once = False
    spark_events = MagicMock()
    sqlquery = SQLQuery("SHOW TABLES",
                        "take",
                        maxrows=-1,
                        spark_events=spark_events)
    sqlquery.to_command = MagicMock()
    sqlquery.to_only_columns_query = MagicMock()
    result1 = ""
    result_data = pd.DataFrame([])
    session = MagicMock()
    sqlquery.to_command.return_value.execute.return_value = (
        True, result1, MIMETYPE_TEXT_PLAIN)
    session.kind = "spark"
    result = sqlquery.execute(session)
    assert_frame_equal(result, result_data)
    sqlquery.to_command.return_value.execute.assert_called_once_with(session)
    spark_events.emit_sql_execution_start_event.assert_called_once_with(
        session.guid, session.kind, session.id, sqlquery.guid,
        sqlquery.samplemethod, sqlquery.maxrows, sqlquery.samplefraction)
    spark_events.emit_sql_execution_end_event.assert_called_once_with(
        session.guid, session.kind, session.id, sqlquery.guid,
        sqlquery.to_command.return_value.guid, True, "", "")
Пример #5
0
def test_unicode_sql():
    query = u"SELECT 'è'"

    sqlquery = SQLQuery(query, samplemethod='take', maxrows=120)
    assert_equals(sqlquery._pyspark_command("spark"),
                  Command(u'for {} in spark.sql(u"""{} """).toJSON().take(120): print({}.encode("{}"))'\
                          .format(LONG_RANDOM_VARIABLE_NAME, query,
                                  LONG_RANDOM_VARIABLE_NAME, conf.pyspark_dataframe_encoding())))
    assert_equals(sqlquery._scala_command("spark"),
                  Command(u'spark.sql("""{}""").toJSON.take(120).foreach(println)'.format(query)))
    assert_equals(sqlquery._r_command("spark"),
                  Command(u'for ({} in (jsonlite:::toJSON(take(sql("{}"),120)))) {{cat({})}}'.format(LONG_RANDOM_VARIABLE_NAME, query, LONG_RANDOM_VARIABLE_NAME)))
Пример #6
0
def test_scala_livy_sql_options():
    query = "abc"

    sqlquery = SQLQuery(query, samplemethod="take", maxrows=100)
    assert_equals(
        sqlquery._scala_command("sqlContext"),
        Command('sqlContext.sql("""{}""").toJSON.take(100).foreach(println)'.format(query)),
    )

    sqlquery = SQLQuery(query, samplemethod="take", maxrows=-1)
    assert_equals(
        sqlquery._scala_command("sqlContext"),
        Command('sqlContext.sql("""{}""").toJSON.collect.foreach(println)'.format(query)),
    )

    sqlquery = SQLQuery(query, samplemethod="sample", samplefraction=0.25, maxrows=-1)
    assert_equals(
        sqlquery._scala_command("sqlContext"),
        Command('sqlContext.sql("""{}""").toJSON.sample(false, 0.25).collect.foreach(println)'.format(query)),
    )

    sqlquery = SQLQuery(query, samplemethod="sample", samplefraction=0.33, maxrows=3234)
    assert_equals(
        sqlquery._scala_command("sqlContext"),
        Command('sqlContext.sql("""{}""").toJSON.sample(false, 0.33).take(3234).foreach(println)'.format(query)),
    )
Пример #7
0
def test_unicode_sql():
    query = u"SELECT 'è'"

    sqlquery = SQLQuery(query, samplemethod='take', maxrows=120)
    assert_equals(sqlquery._pyspark_command(),
                  Command(u'for {} in sqlContext.sql(u"""{} """).toJSON().take(120): print({}.encode("{}"))'\
                          .format(LONG_RANDOM_VARIABLE_NAME, query,
                                  LONG_RANDOM_VARIABLE_NAME, conf.pyspark_sql_encoding())))
    assert_equals(sqlquery._scala_command(),
                  Command(u'sqlContext.sql("""{}""").toJSON.take(120).foreach(println)'.format(query)))

    try:
        sqlquery._r_command()
        assert False
    except NotImplementedError:
        pass
Пример #8
0
def test_execute_sql_failure_emits_event():
    spark_events = MagicMock()
    sqlquery = SQLQuery("HERE IS THE QUERY", "take", 100, 0.2, spark_events)
    sqlquery.to_command = MagicMock()
    sqlquery.to_command.return_value.execute = MagicMock(side_effect=ValueError('yo'))
    session = MagicMock()
    session.kind = "pyspark"
    try:
        result = sqlquery.execute(session)
        assert False
    except ValueError:
        sqlquery.to_command.return_value.execute.assert_called_once_with(session)
        spark_events.emit_sql_execution_end_event.assert_called_once_with(session.guid, session.kind,
                                                                           session.id, sqlquery.guid,
                                                                           sqlquery.to_command.return_value.guid,
                                                                           False, 'ValueError', 'yo')
Пример #9
0
def test_execute_sql_failure_emits_event():
    spark_events = MagicMock()
    sqlquery = SQLQuery("HERE IS THE QUERY", "take", 100, 0.2, spark_events)
    sqlquery.to_command = MagicMock()
    sqlquery.to_command.return_value.execute = MagicMock(side_effect=ValueError('yo'))
    session = MagicMock()
    session.kind = "pyspark"
    try:
        result = sqlquery.execute(session)
        assert False
    except ValueError:
        sqlquery.to_command.return_value.execute.assert_called_once_with(session)
        spark_events.emit_sql_execution_end_event.assert_called_once_with(session.guid, session.kind,
                                                                           session.id, sqlquery.guid,
                                                                           sqlquery.to_command.return_value.guid,
                                                                           False, 'ValueError', 'yo')
Пример #10
0
 def switch_user_database(self, session_name):
     from sparkmagic.livyclientlib.sqlquery import SQLQuery
     database = conf.user_database()
     if conf.switch_to_user_database() and database:
         switch_db_code = "use {}".format(database)
         sqlquery = SQLQuery(switch_db_code)
         self.run_sqlquery(sqlquery, session_name)
         self.logger.debug("Switch user database: %s" % database)
Пример #11
0
def test_sqlquery_initializes():
    query = "HERE IS MY SQL QUERY SELECT * FROM CREATE DROP TABLE"
    samplemethod = "take"
    maxrows = 120
    samplefraction = 0.6
    sqlquery = SQLQuery(query, samplemethod, maxrows, samplefraction)
    assert_equals(sqlquery.query, query)
    assert_equals(sqlquery.samplemethod, samplemethod)
    assert_equals(sqlquery.maxrows, maxrows)
    assert_equals(sqlquery.samplefraction, samplefraction)
Пример #12
0
def test_unicode_sql():
    query = u"SELECT 'è'"
    longvar = LONG_RANDOM_VARIABLE_NAME

    sqlquery = SQLQuery(query, samplemethod='take', maxrows=120)
    assert_equals(sqlquery._pyspark_command("spark"),
                  Command(u'import sys\nfor {} in spark.sql(u"""{} """).toJSON(use_unicode=(sys.version_info.major > 2)).take(120): print({})'\
                          .format(longvar, query,
                                  longvar)))
    assert_equals(
        sqlquery._scala_command("spark"),
        Command(
            u'spark.sql("""{}""").toJSON.take(120).foreach(println)'.format(
                query)))
    assert_equals(
        sqlquery._r_command("spark"),
        Command(
            u'for ({} in (jsonlite:::toJSON(take(sql("{}"),120)))) {{cat({})}}'
            .format(longvar, query, longvar)))
Пример #13
0
def test_sql_df_execution_without_output_var():
    df = 0
    query = SQLQuery("")
    output_var = None
    magic.spark_controller.run_sqlquery = MagicMock(return_value=df)
    res = magic.execute_sqlquery("", None, None, None, session, output_var, False)

    magic.spark_controller.run_sqlquery.assert_called_once_with(query, session)
    assert res == df
    assert_equals(list(shell.user_ns.keys()), [])
Пример #14
0
def test_execute_sql():
    spark_events = MagicMock()
    sqlquery = SQLQuery("HERE IS THE QUERY", "take", 100, 0.2, spark_events=spark_events)
    sqlquery.to_command = MagicMock(return_value=MagicMock())
    result = """{"z":100, "nullv":null, "y":50}
{"z":25, "nullv":null, "y":10}"""
    sqlquery.to_command.return_value.execute = MagicMock(return_value=(True, result))
    result_data = pd.DataFrame([{'z': 100, "nullv": None, 'y': 50}, {'z':25, "nullv":None, 'y':10}], columns=['z', "nullv", 'y'])
    session = MagicMock()
    session.kind = "pyspark"
    result = sqlquery.execute(session)
    assert_frame_equal(result, result_data)
    sqlquery.to_command.return_value.execute.assert_called_once_with(session)
    spark_events.emit_sql_execution_start_event._assert_called_once_with(session.guid, session.kind,
                                                                         session.id, sqlquery.guid)
    spark_events.emit_sql_execution_end_event._assert_called_once_with(session.guid, session.kind,
                                                                       session.id, sqlquery.guid,
                                                                       sqlquery.to_command.return_value.guid,
                                                                       True, "", "")
Пример #15
0
def test_sql_df_execution_quiet_with_output_var():
    df = 0
    cell = SQLQuery("")
    output_var = "var_name"

    magic.spark_controller = MagicMock()
    magic.spark_controller.run_sqlquery = MagicMock(return_value=df)

    res = magic.execute_sqlquery("", None, None, None, session, output_var, True)

    magic.spark_controller.run_sqlquery.assert_called_once_with(cell, session)
    assert res is None
    assert shell.user_ns[output_var] == df
Пример #16
0
def test_sqlquery_loads_defaults():
    defaults = {
        conf.default_samplemethod.__name__: "sample",
        conf.default_maxrows.__name__: 419,
        conf.default_samplefraction.__name__: 0.99,
    }
    conf.override_all(defaults)
    query = "DROP TABLE USERS;"
    sqlquery = SQLQuery(query)
    assert_equals(sqlquery.query, query)
    assert_equals(sqlquery.samplemethod, defaults[conf.default_samplemethod.__name__])
    assert_equals(sqlquery.maxrows, defaults[conf.default_maxrows.__name__])
    assert_equals(sqlquery.samplefraction, defaults[conf.default_samplefraction.__name__])
Пример #17
0
def test_unicode_sql():
    query = u"SELECT 'è'"

    sqlquery = SQLQuery(query, samplemethod="take", maxrows=120)
    assert_equals(
        sqlquery._pyspark_command("spark"),
        Command(
            u'for {} in spark.sql(u"""{} """).toJSON().take(120): print({}.encode("{}"))'.format(
                LONG_RANDOM_VARIABLE_NAME, query, LONG_RANDOM_VARIABLE_NAME, conf.pyspark_sql_encoding()
            )
        ),
    )
    assert_equals(
        sqlquery._scala_command("spark"),
        Command(u'spark.sql("""{}""").toJSON.take(120).foreach(println)'.format(query)),
    )
    assert_equals(
        sqlquery._r_command(),
        Command(
            u'for ({} in (jsonlite:::toJSON(take(sql("{}"),120)))) {{cat({})}}'.format(
                LONG_RANDOM_VARIABLE_NAME, query, LONG_RANDOM_VARIABLE_NAME
            )
        ),
    )
Пример #18
0
def test_r_livy_sql_options_spark():
    query = "abc"
    sqlquery = SQLQuery(query, samplemethod="take", maxrows=100)

    assert_equals(
        sqlquery._r_command(),
        Command(
            'for ({} in (jsonlite:::toJSON(take(sql("{}"),100)))) {{cat({})}}'.format(
                LONG_RANDOM_VARIABLE_NAME, query, LONG_RANDOM_VARIABLE_NAME
            )
        ),
    )

    sqlquery = SQLQuery(query, samplemethod="take", maxrows=-1)
    assert_equals(
        sqlquery._r_command(),
        Command(
            'for ({} in (jsonlite:::toJSON(collect(sql("{}"))))) {{cat({})}}'.format(
                LONG_RANDOM_VARIABLE_NAME, query, LONG_RANDOM_VARIABLE_NAME
            )
        ),
    )

    sqlquery = SQLQuery(query, samplemethod="sample", samplefraction=0.25, maxrows=-1)
    assert_equals(
        sqlquery._r_command(),
        Command(
            'for ({} in (jsonlite:::toJSON(collect(sample(sql("{}"), FALSE, 0.25))))) {{cat({})}}'.format(
                LONG_RANDOM_VARIABLE_NAME, query, LONG_RANDOM_VARIABLE_NAME
            )
        ),
    )

    sqlquery = SQLQuery(query, samplemethod="sample", samplefraction=0.33, maxrows=3234)
    assert_equals(
        sqlquery._r_command(),
        Command(
            'for ({} in (jsonlite:::toJSON(take(sample(sql("{}"), FALSE, 0.33),3234)))) {{cat({})}}'.format(
                LONG_RANDOM_VARIABLE_NAME, query, LONG_RANDOM_VARIABLE_NAME
            )
        ),
    )
Пример #19
0
def test_run_sql_command_parses():
    run_cell_method = MagicMock()
    run_cell_method.return_value = (True, "", MIMETYPE_TEXT_PLAIN)
    spark_controller.run_sqlquery = run_cell_method

    command = "-s"
    name = "sessions_name"
    context = "-c"
    context_name = "sql"
    meth = "-m"
    method_name = "sample"
    line = " ".join([command, name, context, context_name, meth, method_name])
    cell = "cell code"

    result = magic.spark(line, cell)

    run_cell_method.assert_called_once_with(SQLQuery(cell, samplemethod=method_name), name)
    assert result is not None
Пример #20
0
def test_run_sql_command_knows_how_to_be_quiet():
    run_cell_method = MagicMock()
    run_cell_method.return_value = (True, "")
    spark_controller.run_sqlquery = run_cell_method

    command = "-s"
    name = "sessions_name"
    context = "-c"
    context_name = "sql"
    quiet = "-q"
    meth = "-m"
    method_name = "sample"
    line = " ".join([command, name, context, context_name, quiet, meth, method_name])
    cell = "cell code"

    result = magic.spark(line, cell)

    run_cell_method.assert_called_once_with(SQLQuery(cell, samplemethod=method_name), name)
    assert result is None
Пример #21
0
def test_run_sql_command_exception():
    run_cell_method = MagicMock()
    run_cell_method.side_effect = LivyUnexpectedStatusException('WOW')
    spark_controller.run_sqlquery = run_cell_method

    command = "-s"
    name = "sessions_name"
    context = "-c"
    context_name = "sql"
    meth = "-m"
    method_name = "sample"
    line = " ".join([command, name, context, context_name, meth, method_name])
    cell = "cell code"

    result = magic.spark(line, cell)

    run_cell_method.assert_called_once_with(SQLQuery(cell, samplemethod=method_name), name)
    ipython_display.send_error.assert_called_once_with(EXPECTED_ERROR_MSG
                                                       .format(run_cell_method.side_effect))
Пример #22
0
def test_df_execution_with_output_var():
    shell = MagicMock()
    shell.user_ns = {}
    magic = SparkMagicBase(None)
    magic.shell = shell

    df = 0
    query = SQLQuery("")
    session = MagicMock()
    output_var = "var_name"

    magic.spark_controller = MagicMock()
    magic.spark_controller.run_sqlquery = MagicMock(return_value=df)

    res = magic.execute_sqlquery("", None, None, None, session, output_var,
                                 False)

    magic.spark_controller.run_sqlquery.assert_called_once_with(query, session)
    assert res == df
    assert shell.user_ns[output_var] == df
Пример #23
0
def test_df_execution_quiet_without_output_var():
    shell = MagicMock()
    shell.user_ns = {}
    magic = SparkMagicBase(None)
    magic.shell = shell

    df = 0
    cell = SQLQuery("")
    session = MagicMock()
    output_var = None

    magic.spark_controller = MagicMock()
    magic.spark_controller.run_sqlquery = MagicMock(return_value=df)

    res = magic.execute_sqlquery("", None, None, None, session, output_var,
                                 True)

    magic.spark_controller.run_sqlquery.assert_called_once_with(cell, session)
    assert res is None
    assert_equals(list(shell.user_ns.keys()), [])
Пример #24
0
def test_unicode_sql():
    query = u"SELECT 'è'"

    sqlquery = SQLQuery(query, samplemethod='take', maxrows=120)
    assert_equals(sqlquery._pyspark_command(),
                  Command(u'for {} in sqlContext.sql(u"""{} """).toJSON().take(120): print({}.encode("{}"))'\
                          .format(LONG_RANDOM_VARIABLE_NAME, query,
                                  LONG_RANDOM_VARIABLE_NAME, conf.pyspark_sql_encoding())))
    assert_equals(
        sqlquery._scala_command(),
        Command(u'sqlContext.sql("""{}""").toJSON.take(120).foreach(println)'.
                format(query)))

    try:
        sqlquery._r_command()
        assert False
    except NotImplementedError:
        pass
Пример #25
0
 def _sqlquery(cell, samplemethod, maxrows, samplefraction):
     return SQLQuery(cell, samplemethod, maxrows, samplefraction)
Пример #26
0
def test_scala_livy_sql_options():
    query = "abc"

    sqlquery = SQLQuery(query, samplemethod='take', maxrows=100)
    assert_equals(
        sqlquery._scala_command(),
        Command('sqlContext.sql("""{}""").toJSON.take(100).foreach(println)'.
                format(query)))

    sqlquery = SQLQuery(query, samplemethod='take', maxrows=-1)
    assert_equals(
        sqlquery._scala_command(),
        Command(
            'sqlContext.sql("""{}""").toJSON.collect.foreach(println)'.format(
                query)))

    sqlquery = SQLQuery(query,
                        samplemethod='sample',
                        samplefraction=0.25,
                        maxrows=-1)
    assert_equals(
        sqlquery._scala_command(),
        Command(
            'sqlContext.sql("""{}""").toJSON.sample(false, 0.25).collect.foreach(println)'
            .format(query)))

    sqlquery = SQLQuery(query,
                        samplemethod='sample',
                        samplefraction=0.33,
                        maxrows=3234)
    assert_equals(
        sqlquery._scala_command(),
        Command(
            'sqlContext.sql("""{}""").toJSON.sample(false, 0.33).take(3234).foreach(println)'
            .format(query)))
Пример #27
0
def test_pyspark_livy_sql_options():
    query = "abc"

    sqlquery = SQLQuery(query, samplemethod='take', maxrows=120)
    assert_equals(sqlquery._pyspark_command(),
                  Command(u'for {} in sqlContext.sql(u"""{} """).toJSON().take(120): print({}.encode("{}"))'\
                          .format(LONG_RANDOM_VARIABLE_NAME, query,
                                  LONG_RANDOM_VARIABLE_NAME, conf.pyspark_sql_encoding())))

    sqlquery = SQLQuery(query, samplemethod='take', maxrows=-1)
    assert_equals(sqlquery._pyspark_command(),
                  Command(u'for {} in sqlContext.sql(u"""{} """).toJSON().collect(): print({}.encode("{}"))'\
                          .format(LONG_RANDOM_VARIABLE_NAME, query,
                                  LONG_RANDOM_VARIABLE_NAME, conf.pyspark_sql_encoding())))

    sqlquery = SQLQuery(query,
                        samplemethod='sample',
                        samplefraction=0.25,
                        maxrows=-1)
    assert_equals(sqlquery._pyspark_command(),
                  Command(u'for {} in sqlContext.sql(u"""{} """).toJSON().sample(False, 0.25).collect(): '
                          u'print({}.encode("{}"))'\
                          .format(LONG_RANDOM_VARIABLE_NAME, query,
                                  LONG_RANDOM_VARIABLE_NAME, conf.pyspark_sql_encoding())))

    sqlquery = SQLQuery(query,
                        samplemethod='sample',
                        samplefraction=0.33,
                        maxrows=3234)
    assert_equals(sqlquery._pyspark_command(),
                  Command(u'for {} in sqlContext.sql(u"""{} """).toJSON().sample(False, 0.33).take(3234): '
                          u'print({}.encode("{}"))'\
                          .format(LONG_RANDOM_VARIABLE_NAME, query,
                                  LONG_RANDOM_VARIABLE_NAME, conf.pyspark_sql_encoding())))
Пример #28
0
def test_sqlquery_rejects_bad_data():
    query = "HERE IS MY SQL QUERY SELECT * FROM CREATE DROP TABLE"
    samplemethod = "foo"
    _ = SQLQuery(query, samplemethod)
Пример #29
0
def test_r_livy_sql_options_spark2():
        query = "abc"
        sqlquery = SQLQuery(query, samplemethod='take', maxrows=100)

        assert_equals(sqlquery._r_command("spark"),
                      Command('for ({} in (jsonlite:::toJSON(take(sql("{}"),100)))) {{cat({})}}'.format(LONG_RANDOM_VARIABLE_NAME, query, LONG_RANDOM_VARIABLE_NAME)))

        sqlquery = SQLQuery(query, samplemethod='take', maxrows=-1)
        assert_equals(sqlquery._r_command("spark"),
                      Command('for ({} in (jsonlite:::toJSON(collect(sql("{}"))))) {{cat({})}}'.format(LONG_RANDOM_VARIABLE_NAME, query, LONG_RANDOM_VARIABLE_NAME)))

        sqlquery = SQLQuery(query, samplemethod='sample', samplefraction=0.25, maxrows=-1)
        assert_equals(sqlquery._r_command("spark"),
                      Command('for ({} in (jsonlite:::toJSON(collect(sample(sql("{}"), FALSE, 0.25))))) {{cat({})}}'.format(LONG_RANDOM_VARIABLE_NAME, query, LONG_RANDOM_VARIABLE_NAME)))

        sqlquery = SQLQuery(query, samplemethod='sample', samplefraction=0.33, maxrows=3234)
        assert_equals(sqlquery._r_command("spark"),
                      Command('for ({} in (jsonlite:::toJSON(take(sample(sql("{}"), FALSE, 0.33),3234)))) {{cat({})}}'.format(LONG_RANDOM_VARIABLE_NAME, query, LONG_RANDOM_VARIABLE_NAME)))
Пример #30
0
def test_to_command_pyspark3():
    variable_name = "var_name"
    sqlquery = SQLQuery("Query")
    sqlquery._pyspark_command = MagicMock(return_value=MagicMock())
    sqlquery.to_command("pyspark3", variable_name)
    sqlquery._pyspark_command.assert_called_with(variable_name, False)
Пример #31
0
def test_to_command_pyspark3():
    variable_name = "var_name"
    sqlquery = SQLQuery("Query")
    sqlquery._pyspark_command = MagicMock(return_value=MagicMock())
    sqlquery.to_command("pyspark3", variable_name)
    sqlquery._pyspark_command.assert_called_with(variable_name, False)
Пример #32
0
def test_pyspark_livy_sql_options():
    query = "abc"

    sqlquery = SQLQuery(query, samplemethod='take', maxrows=120)
    assert_equals(sqlquery._pyspark_command("sqlContext"),
                  Command(u'import sys\nfor {} in sqlContext.sql(u"""{} """).toJSON(use_unicode=(sys.version_info.major > 2)).take(120): print({})'\
                          .format(LONG_RANDOM_VARIABLE_NAME, query,
                                  LONG_RANDOM_VARIABLE_NAME)))

    sqlquery = SQLQuery(query, samplemethod='take', maxrows=-1)
    assert_equals(sqlquery._pyspark_command("sqlContext"),
                  Command(u'import sys\nfor {} in sqlContext.sql(u"""{} """).toJSON(use_unicode=(sys.version_info.major > 2)).collect(): print({})'\
                          .format(LONG_RANDOM_VARIABLE_NAME, query,
                                  LONG_RANDOM_VARIABLE_NAME)))

    sqlquery = SQLQuery(query,
                        samplemethod='sample',
                        samplefraction=0.25,
                        maxrows=-1)
    assert_equals(sqlquery._pyspark_command("sqlContext"),
                  Command(u'import sys\nfor {} in sqlContext.sql(u"""{} """).toJSON(use_unicode=(sys.version_info.major > 2)).sample(False, 0.25).collect(): '
                          u'print({})'\
                          .format(LONG_RANDOM_VARIABLE_NAME, query,
                                  LONG_RANDOM_VARIABLE_NAME)))

    sqlquery = SQLQuery(query,
                        samplemethod='sample',
                        samplefraction=0.33,
                        maxrows=3234)
    assert_equals(sqlquery._pyspark_command("sqlContext"),
                  Command(u'import sys\nfor {} in sqlContext.sql(u"""{} """).toJSON(use_unicode=(sys.version_info.major > 2)).sample(False, 0.33).take(3234): '
                          u'print({})'\
                          .format(LONG_RANDOM_VARIABLE_NAME, query,
                                  LONG_RANDOM_VARIABLE_NAME)))

    sqlquery = SQLQuery(query,
                        samplemethod='sample',
                        samplefraction=0.33,
                        maxrows=3234)
    assert_equals(sqlquery._pyspark_command("spark"),
                  Command(u'import sys\nfor {} in spark.sql(u"""{} """).toJSON(use_unicode=(sys.version_info.major > 2)).sample(False, 0.33).take(3234): '
                          u'print({})'\
                          .format(LONG_RANDOM_VARIABLE_NAME, query, LONG_RANDOM_VARIABLE_NAME)))