def test_hive_error_msg(self): msg = ('{...} errorMessage="Error while compiling statement: FAILED: ' "SemanticException [Error 10001]: Line 4" ":5 Table not found 'fact_ridesfdslakj'\", statusCode=3, " "sqlState='42S02', errorCode=10001)){...}") self.assertEquals( ("Error while compiling statement: FAILED: " "SemanticException [Error 10001]: Line 4:5 " "Table not found 'fact_ridesfdslakj'"), HiveEngineSpec.extract_error_message(Exception(msg)), ) e = Exception("Some string that doesn't match the regex") self.assertEquals(str(e), HiveEngineSpec.extract_error_message(e)) msg = ( "errorCode=10001, " 'errorMessage="Error while compiling statement"), operationHandle' '=None)"') self.assertEquals( ("Error while compiling statement"), HiveEngineSpec.extract_error_message(Exception(msg)), )
def test_job_2_launched_stage_2_stages_progress( self ): # pylint: disable=invalid-name log = """ 17/02/07 19:15:55 INFO ql.Driver: Total jobs = 2 17/02/07 19:15:55 INFO ql.Driver: Launching Job 1 out of 2 17/02/07 19:16:09 INFO exec.Task: 2017-02-07 19:16:09,173 Stage-1 map = 100%, reduce = 0% 17/02/07 19:15:55 INFO ql.Driver: Launching Job 2 out of 2 17/02/07 19:16:09 INFO exec.Task: 2017-02-07 19:16:09,173 Stage-1 map = 0%, reduce = 0% 17/02/07 19:16:09 INFO exec.Task: 2017-02-07 19:16:09,173 Stage-1 map = 40%, reduce = 0% """.split( "\n" ) self.assertEqual(60, HiveEngineSpec.progress(log))
def test_df_to_sql_if_exists_replace_with_schema(mock_upload_to_s3, mock_g): mock_upload_to_s3.return_value = "mock-location" mock_g.user = True mock_database = mock.MagicMock() mock_database.get_df.return_value.empty = False mock_execute = mock.MagicMock(return_value=True) mock_database.get_sqla_engine.return_value.execute = mock_execute table_name = "foobar" schema = "schema" HiveEngineSpec.df_to_sql( mock_database, Table(table=table_name, schema=schema), pd.DataFrame(), { "if_exists": "replace", "header": 1, "na_values": "mock", "sep": "mock" }, ) mock_execute.assert_any_call(f"DROP TABLE IF EXISTS {schema}.{table_name}")
def test_create_table_from_csv_if_exists_replace(mock_upload_to_s3, mock_table, mock_g): mock_upload_to_s3.return_value = "mock-location" mock_table.infer.return_value = {} mock_g.user = True mock_database = mock.MagicMock() mock_database.get_df.return_value.empty = False mock_execute = mock.MagicMock(return_value=True) mock_database.get_sqla_engine.return_value.execute = mock_execute table_name = "foobar" HiveEngineSpec.create_table_from_csv( "foo.csv", Table(table=table_name), mock_database, { "sep": "mock", "header": 1, "na_values": "mock" }, {"if_exists": "replace"}, ) mock_execute.assert_any_call(f"DROP TABLE IF EXISTS {table_name}")
def test_hive_get_view_names_return_empty_list(self, ): # pylint: disable=invalid-name self.assertEqual([], HiveEngineSpec.get_view_names(mock.ANY, mock.ANY, mock.ANY))
def epoch_to_dttm(cls) -> str: return HiveEngineSpec.epoch_to_dttm()
def convert_dttm(cls, target_type: str, dttm: datetime) -> Optional[str]: return HiveEngineSpec.convert_dttm(target_type, dttm)
def is_readonly(sql: str) -> bool: return HiveEngineSpec.is_readonly_query(ParsedQuery(sql))
def is_correct_result(data: List, result: List) -> bool: df = pd.DataFrame({"partition": data}) return HiveEngineSpec._latest_partition_from_df(df) == result
def test_job_1_launched_progress(self): log = """ 17/02/07 19:15:55 INFO ql.Driver: Total jobs = 2 17/02/07 19:15:55 INFO ql.Driver: Launching Job 1 out of 2 """.split("\n") self.assertEquals(0, HiveEngineSpec.progress(log))
def test_hive_get_view_names_return_empty_list(): # pylint: disable=invalid-name assert HiveEngineSpec.get_view_names(mock.ANY, mock.ANY, mock.ANY) == []
def convert_dttm( cls, target_type: str, dttm: datetime, db_extra: Optional[Dict[str, Any]] = None ) -> Optional[str]: return HiveEngineSpec.convert_dttm(target_type, dttm, db_extra=db_extra)
def test_number_of_jobs_progress(): log = """ 17/02/07 19:15:55 INFO ql.Driver: Total jobs = 2 """.split("\n") assert HiveEngineSpec.progress(log) == 0
def test_fetch_data_success(fetch_data_mock): return_value = ["a", "b"] fetch_data_mock.return_value = return_value cursor = mock.Mock() assert HiveEngineSpec.fetch_data(cursor) == return_value
def test_fetch_data_programming_error(fetch_data_mock): from pyhive.exc import ProgrammingError fetch_data_mock.side_effect = ProgrammingError cursor = mock.Mock() assert HiveEngineSpec.fetch_data(cursor) == []
def test_0_progress(self): log = """ 17/02/07 18:26:27 INFO log.PerfLogger: <PERFLOG method=compile from=org.apache.hadoop.hive.ql.Driver> 17/02/07 18:26:27 INFO log.PerfLogger: <PERFLOG method=parse from=org.apache.hadoop.hive.ql.Driver> """.split("\n") self.assertEquals(0, HiveEngineSpec.progress(log))
def test_number_of_jobs_progress(self): log = """ 17/02/07 19:15:55 INFO ql.Driver: Total jobs = 2 """.split("\n") self.assertEquals(0, HiveEngineSpec.progress(log))
def test_create_table_from_csv_append() -> None: with pytest.raises(SupersetException): HiveEngineSpec.create_table_from_csv( "foo.csv", Table("foobar"), mock.MagicMock(), {}, {"if_exists": "append"} )
def test_hive_get_view_names_return_empty_list(self): self.assertEquals([], HiveEngineSpec.get_view_names( mock.ANY, mock.ANY, mock.ANY))
def test_get_create_table_stmt() -> None: table = Table("employee") schema_def = """eid int, name String, salary String, destination String""" location = "s3a://directory/table" from unittest import TestCase TestCase.maxDiff = None assert HiveEngineSpec.get_create_table_stmt( table, schema_def, location, ",", 0, [""] ) == ( """CREATE TABLE employee ( eid int, name String, salary String, destination String ) ROW FORMAT DELIMITED FIELDS TERMINATED BY :delim STORED AS TEXTFILE LOCATION :location tblproperties ('skip.header.line.count'=:header_line_count, 'serialization.null.format'=:null_value)""", { "delim": ",", "location": "s3a://directory/table", "header_line_count": "1", "null_value": "", }, ) assert HiveEngineSpec.get_create_table_stmt( table, schema_def, location, ",", 1, ["1", "2"] ) == ( """CREATE TABLE employee ( eid int, name String, salary String, destination String ) ROW FORMAT DELIMITED FIELDS TERMINATED BY :delim STORED AS TEXTFILE LOCATION :location tblproperties ('skip.header.line.count'=:header_line_count, 'serialization.null.format'=:null_value)""", { "delim": ",", "location": "s3a://directory/table", "header_line_count": "2", "null_value": "1", }, ) assert HiveEngineSpec.get_create_table_stmt( table, schema_def, location, ",", 100, ["NaN"] ) == ( """CREATE TABLE employee ( eid int, name String, salary String, destination String ) ROW FORMAT DELIMITED FIELDS TERMINATED BY :delim STORED AS TEXTFILE LOCATION :location tblproperties ('skip.header.line.count'=:header_line_count, 'serialization.null.format'=:null_value)""", { "delim": ",", "location": "s3a://directory/table", "header_line_count": "101", "null_value": "NaN", }, ) assert HiveEngineSpec.get_create_table_stmt( table, schema_def, location, ",", None, None ) == ( """CREATE TABLE employee ( eid int, name String, salary String, destination String ) ROW FORMAT DELIMITED FIELDS TERMINATED BY :delim STORED AS TEXTFILE LOCATION :location""", {"delim": ",", "location": "s3a://directory/table"}, ) assert HiveEngineSpec.get_create_table_stmt( table, schema_def, location, ",", 100, [] ) == ( """CREATE TABLE employee ( eid int, name String, salary String, destination String ) ROW FORMAT DELIMITED FIELDS TERMINATED BY :delim STORED AS TEXTFILE LOCATION :location tblproperties ('skip.header.line.count'=:header_line_count)""", {"delim": ",", "location": "s3a://directory/table", "header_line_count": "101"}, )