Пример #1
0
def test_pyspark_livy_sql_options():
    query = "abc"

    sqlquery = SQLQuery(query, samplemethod='take', maxrows=120)
    assert_equals(sqlquery._pyspark_command(),
                  Command(u'for {} in sqlContext.sql(u"""{}""").toJSON().take(120): print({}.encode("{}"))'\
                          .format(LONG_RANDOM_VARIABLE_NAME, query,
                                  LONG_RANDOM_VARIABLE_NAME, conf.pyspark_sql_encoding())))

    sqlquery = SQLQuery(query, samplemethod='take', maxrows=-1)
    assert_equals(sqlquery._pyspark_command(),
                  Command(u'for {} in sqlContext.sql(u"""{}""").toJSON().collect(): print({}.encode("{}"))'\
                          .format(LONG_RANDOM_VARIABLE_NAME, query,
                                  LONG_RANDOM_VARIABLE_NAME, conf.pyspark_sql_encoding())))

    sqlquery = SQLQuery(query, samplemethod='sample', samplefraction=0.25, maxrows=-1)
    assert_equals(sqlquery._pyspark_command(),
                  Command(u'for {} in sqlContext.sql(u"""{}""").toJSON().sample(False, 0.25).collect(): print({}.encode("{}"))'\
                          .format(LONG_RANDOM_VARIABLE_NAME, query,
                                  LONG_RANDOM_VARIABLE_NAME, conf.pyspark_sql_encoding())))

    sqlquery = SQLQuery(query, samplemethod='sample', samplefraction=0.33, maxrows=3234)
    assert_equals(sqlquery._pyspark_command(),
                  Command(u'for {} in sqlContext.sql(u"""{}""").toJSON().sample(False, 0.33).take(3234): print({}.encode("{}"))'\
                          .format(LONG_RANDOM_VARIABLE_NAME, query,
                                  LONG_RANDOM_VARIABLE_NAME, conf.pyspark_sql_encoding())))
Пример #2
0
 def _pyspark_command(self):
     command = u'sqlContext.sql(u"""{}""").toJSON()'.format(self.query)
     if self.samplemethod == u'sample':
         command = u'{}.sample(False, {})'.format(command, self.samplefraction)
     if self.maxrows >= 0:
         command = u'{}.take({})'.format(command, self.maxrows)
     else:
         command = u'{}.collect()'.format(command)
     command = u'for {} in {}: print({}.encode("{}"))'.format(constants.LONG_RANDOM_VARIABLE_NAME,
                                                 command,
                                                 constants.LONG_RANDOM_VARIABLE_NAME,
                                                 conf.pyspark_sql_encoding())
     return Command(command)
Пример #3
0
def test_unicode_sql():
    query = u"SELECT 'è'"

    sqlquery = SQLQuery(query, samplemethod='take', maxrows=120)
    assert_equals(sqlquery._pyspark_command(),
                  Command(u'for {} in sqlContext.sql(u"""{}""").toJSON().take(120): print({}.encode("{}"))'\
                          .format(LONG_RANDOM_VARIABLE_NAME, query,
                                  LONG_RANDOM_VARIABLE_NAME, conf.pyspark_sql_encoding())))
    assert_equals(sqlquery._scala_command(),
                  Command(u'sqlContext.sql("""{}""").toJSON.take(120).foreach(println)'.format(query)))

    try:
        sqlquery._r_command()
        assert False
    except NotImplementedError:
        pass