def test_execute_sql_pandas_pyspark_livy(self): mock_spark_session = MagicMock() client = PandasScalaLivyClient(mock_spark_session, 10) # Set up spark session to return JSON result_json = "{\"buildingID\":0,\"date\":\"6/1/13\",\"temp_diff\":12}\n" \ "{\"buildingID\":1,\"date\":\"6/1/13\",\"temp_diff\":0}" self.execute_responses = [result_json] mock_spark_session.execute.side_effect = self._next_response_execute # pandas to return records = [{u'buildingID': 0, u'date': u'6/1/13', u'temp_diff': 12}, {u'buildingID': 1, u'date': u'6/1/13', u'temp_diff': 0}] desired_result = pd.DataFrame(records) command = "command" result = client.execute_sql(command) # Verify basic calls were done mock_spark_session.create_sql_context.assert_called_with() mock_spark_session.wait_for_state.assert_called_with("idle") # Verify result is desired pandas dataframe assert_frame_equal(desired_result, result)
def test_execute_code_pandas_pyspark_livy(self): mock_spark_session = MagicMock() client = PandasScalaLivyClient(mock_spark_session, 10) command = "command" client.execute(command) mock_spark_session.wait_for_state.assert_called_with("idle") mock_spark_session.execute.assert_called_with(command)
def test_execute_sql_pandas_scala_livy_no_results(self): mock_spark_session = MagicMock() client = PandasScalaLivyClient(mock_spark_session, 10) # Set up spark session to return JSON result_json = "" result_columns = "res1: Array[String] = Array(buildingID, date, temp_diff)" self.execute_responses = [result_json, result_columns] mock_spark_session.execute.side_effect = self._next_response_execute # pandas to return columns = ["buildingID", "date", "temp_diff"] desired_result = pd.DataFrame.from_records(list(), columns=columns) command = "command" result = client.execute_sql(command) # Verify basic calls were done mock_spark_session.create_sql_context.assert_called_with() mock_spark_session.wait_for_state.assert_called_with("idle") # Verify result is desired pandas dataframe assert_frame_equal(desired_result, result)