def test_pyspark_livy_sql_options(): query = "abc" sqlquery = SQLQuery(query, samplemethod='take', maxrows=120) assert_equals(sqlquery._pyspark_command(), Command(u'for {} in sqlContext.sql(u"""{} """).toJSON().take(120): print({}.encode("{}"))'\ .format(LONG_RANDOM_VARIABLE_NAME, query, LONG_RANDOM_VARIABLE_NAME, conf.pyspark_sql_encoding()))) sqlquery = SQLQuery(query, samplemethod='take', maxrows=-1) assert_equals(sqlquery._pyspark_command(), Command(u'for {} in sqlContext.sql(u"""{} """).toJSON().collect(): print({}.encode("{}"))'\ .format(LONG_RANDOM_VARIABLE_NAME, query, LONG_RANDOM_VARIABLE_NAME, conf.pyspark_sql_encoding()))) sqlquery = SQLQuery(query, samplemethod='sample', samplefraction=0.25, maxrows=-1) assert_equals(sqlquery._pyspark_command(), Command(u'for {} in sqlContext.sql(u"""{} """).toJSON().sample(False, 0.25).collect(): ' u'print({}.encode("{}"))'\ .format(LONG_RANDOM_VARIABLE_NAME, query, LONG_RANDOM_VARIABLE_NAME, conf.pyspark_sql_encoding()))) sqlquery = SQLQuery(query, samplemethod='sample', samplefraction=0.33, maxrows=3234) assert_equals(sqlquery._pyspark_command(), Command(u'for {} in sqlContext.sql(u"""{} """).toJSON().sample(False, 0.33).take(3234): ' u'print({}.encode("{}"))'\ .format(LONG_RANDOM_VARIABLE_NAME, query, LONG_RANDOM_VARIABLE_NAME, conf.pyspark_sql_encoding())))
def _pyspark_command(self): command = u'sqlContext.sql(u"""{} """).toJSON()'.format(self.query) if self.samplemethod == u'sample': command = u'{}.sample(False, {})'.format(command, self.samplefraction) if self.maxrows >= 0: command = u'{}.take({})'.format(command, self.maxrows) else: command = u'{}.collect()'.format(command) command = u'for {} in {}: print({}.encode("{}"))'.format(constants.LONG_RANDOM_VARIABLE_NAME, command, constants.LONG_RANDOM_VARIABLE_NAME, conf.pyspark_sql_encoding()) return Command(command)
def _pyspark_command(self): command = u'sqlContext.sql(u"""{} """).toJSON()'.format(self.query) if self.samplemethod == u'sample': command = u'{}.sample(False, {})'.format(command, self.samplefraction) if self.maxrows >= 0: command = u'{}.take({})'.format(command, self.maxrows) else: command = u'{}.collect()'.format(command) command = u'for {} in {}: print({}.encode("{}"))'.format( constants.LONG_RANDOM_VARIABLE_NAME, command, constants.LONG_RANDOM_VARIABLE_NAME, conf.pyspark_sql_encoding()) return Command(command)
def test_unicode_sql(): query = u"SELECT 'è'" sqlquery = SQLQuery(query, samplemethod='take', maxrows=120) assert_equals(sqlquery._pyspark_command(), Command(u'for {} in sqlContext.sql(u"""{} """).toJSON().take(120): print({}.encode("{}"))'\ .format(LONG_RANDOM_VARIABLE_NAME, query, LONG_RANDOM_VARIABLE_NAME, conf.pyspark_sql_encoding()))) assert_equals(sqlquery._scala_command(), Command(u'sqlContext.sql("""{}""").toJSON.take(120).foreach(println)'.format(query))) try: sqlquery._r_command() assert False except NotImplementedError: pass
def test_unicode_sql(): query = u"SELECT 'è'" sqlquery = SQLQuery(query, samplemethod='take', maxrows=120) assert_equals(sqlquery._pyspark_command(), Command(u'for {} in sqlContext.sql(u"""{} """).toJSON().take(120): print({}.encode("{}"))'\ .format(LONG_RANDOM_VARIABLE_NAME, query, LONG_RANDOM_VARIABLE_NAME, conf.pyspark_sql_encoding()))) assert_equals( sqlquery._scala_command(), Command(u'sqlContext.sql("""{}""").toJSON.take(120).foreach(println)'. format(query))) try: sqlquery._r_command() assert False except NotImplementedError: pass
def _pyspark_command(self, sql_context_variable_name, encode_result=True): command = u'{}.sql(u"""{} """).toJSON()'.format(sql_context_variable_name, self.query) if self.samplemethod == u'sample': command = u'{}.sample(False, {})'.format(command, self.samplefraction) if self.maxrows >= 0: command = u'{}.take({})'.format(command, self.maxrows) else: command = u'{}.collect()'.format(command) # Unicode support has improved in Python 3 so we don't need to encode. if encode_result: print_command = '{}.encode("{}")'.format(constants.LONG_RANDOM_VARIABLE_NAME, conf.pyspark_sql_encoding()) else: print_command = constants.LONG_RANDOM_VARIABLE_NAME command = u'for {} in {}: print({})'.format(constants.LONG_RANDOM_VARIABLE_NAME, command, print_command) return Command(command)
def test_unicode_sql(): query = u"SELECT 'è'" sqlquery = SQLQuery(query, samplemethod='take', maxrows=120) assert_equals(sqlquery._pyspark_command("spark"), Command(u'for {} in spark.sql(u"""{} """).toJSON().take(120): print({}.encode("{}"))'\ .format(LONG_RANDOM_VARIABLE_NAME, query, LONG_RANDOM_VARIABLE_NAME, conf.pyspark_sql_encoding()))) assert_equals( sqlquery._scala_command("spark"), Command( u'spark.sql("""{}""").toJSON.take(120).foreach(println)'.format( query))) assert_equals( sqlquery._r_command(), Command( u'for ({} in (jsonlite:::toJSON(take(sql("{}"),120)))) {{cat({})}}' .format(LONG_RANDOM_VARIABLE_NAME, query, LONG_RANDOM_VARIABLE_NAME)))
def test_unicode_sql(): query = u"SELECT 'è'" sqlquery = SQLQuery(query, samplemethod="take", maxrows=120) assert_equals( sqlquery._pyspark_command("spark"), Command( u'for {} in spark.sql(u"""{} """).toJSON().take(120): print({}.encode("{}"))'.format( LONG_RANDOM_VARIABLE_NAME, query, LONG_RANDOM_VARIABLE_NAME, conf.pyspark_sql_encoding() ) ), ) assert_equals( sqlquery._scala_command("spark"), Command(u'spark.sql("""{}""").toJSON.take(120).foreach(println)'.format(query)), ) assert_equals( sqlquery._r_command(), Command( u'for ({} in (jsonlite:::toJSON(take(sql("{}"),120)))) {{cat({})}}'.format( LONG_RANDOM_VARIABLE_NAME, query, LONG_RANDOM_VARIABLE_NAME ) ), )