def test_encode(self): source_str = cte_1 decomposed_source = DecomposedSource(ParsedSource(Source(source_str))) encoded = EncodedSource(decomposed_source) self.assertIsNotNone(encoded) self.assertEqual(decomposed_source.serialize(), encoded.decomposed_source().serialize())
def _apply_dependency_first(encoded_source: EncodedSource, apply_func: Callable[[str, str], bool]): last_dependencies = encoded_source.encoded_dependencies()[-1] for dependency in last_dependencies: _apply_dependency_first(dependency, apply_func) last_statement = encoded_source.encoded_sources()[-1] last_hash = encoded_source.hashed_sources()[-1] return apply_func(last_hash, last_statement)
def test_cte_complex_encode(self): encoded_source = EncodedSource( DecomposedSource(ParsedSource(Source(complex_query)))) self.assertIsNotNone(encoded_source) #self.assertEqual(complex_query, all_encoded_sources.parsed_sources().source()) for hash, encoded_source in zip(encoded_source.hashed_sources(), encoded_source.encoded_sources()): print(f"-----------------------------------\n" f"{hash}" f"\n-----------------------------------\n" f"{encoded_source}")
def test_cte_date_dim_encode(self): source_str = date_dim_query encoded_source_root = EncodedSource( DecomposedSource(ParsedSource(Source(source_str)))) self.assertIsNotNone(encoded_source_root) #self.assertEqual(complex_query, all_encoded_sources.parsed_sources().source()) for hash, encoded_source in zip(encoded_source_root.hashed_sources(), encoded_source_root.encoded_sources()): print(f"-----------------------------------\n" f"{hash}" f"\n-----------------------------------\n" f"{encoded_source}") main_statement_encoded = EncodedSource.from_str(date_dim_select) self.assertEqual( len(main_statement_encoded.all_encoded_sources_by_name()), 1)
def test_apply_dependency_first_basic(self): source_str = basic_str encoded_source_root = EncodedSource.from_str(source_str) datasource = DataSource(encoded_source_root) run_order = [] hashed_cte_1 = EncodedSource.from_str(cte_1).hashed_sources()[-1] hashed_cte_2 = EncodedSource.from_str(cte_2).hashed_sources()[-1] encoded_join = f"WITH cte AS (SELECT * FROM `{hashed_cte_1}`),\n cte2 AS (SELECT * FROM `{hashed_cte_2}`)\n{join_clause}" expected_run_order = [cte_1, cte_2, encoded_join] def apply_to_encoded(hash: str, source: str, run_order: List[str] = run_order): #print(f"hash:{hash}, source:{source}") run_order.append(source) datasource.apply_dependency_first(apply_to_encoded) self.assertEqual(expected_run_order, run_order)
def test_apply_dependency_first_date_cached(self): self.maxDiff = None source_str = date_dim_query_sub_cached #encoded_source_root = EncodedSource.from_str(source_str, prefix="cached_") encoded_source_root = EncodedSource.from_str(source_str, prefix="cached_") datasource = DataSource(encoded_source_root) run_order = [] encoded_settings = EncodedSource.from_str(settings) encoded_date_dim = EncodedSource.from_str(planning_date_dim_table) parsed_week_dim = ParsedSource( Source(planning_week_dim_table) ).serialize( ) #.replace("planning_date_dim_table", f"`{encoded_date_dim.hashed_sources()[-1]}`") encoded_replaced_date_dim = f"WITH planning_date_dim_table AS (SELECT * FROM `{encoded_date_dim.hashed_sources()[-1]}`)\n" encoded_week_dim = EncodedSource.from_str( f"WITH planning_date_dim_table AS ({planning_date_dim_table}) {planning_week_dim_table}" ) encoded_replaced_week_dim = f"WITH planning_week_dim_table AS (SELECT * FROM `{encoded_week_dim.hashed_sources()[-1]}`)\n" replaced_weeks = ParsedSource(Source(weeks)).serialize( ) #.replace("planning_week_dim_table", f"`{encoded_replaced_week_dim.hashed_sources()[-1]}`") expected_run_order = [ encoded_date_dim.encoded_sources()[-1], encoded_replaced_date_dim + parsed_week_dim, encoded_replaced_week_dim + replaced_weeks, encoded_source_root.encoded_sources()[-1] ] already_run = {} def apply_to_encoded(hash: str, source: str, run_order: List[str] = run_order, already_run: Dict[str, str] = already_run): print(f"hash:{hash}, source:{source}") run = already_run.get(hash) if not run: already_run[hash] = source run_order.append(source) datasource.apply_dependency_first(apply_to_encoded) self.assertEqual(expected_run_order, run_order)
def test_apply_dependency_first(self): source_str = cte_1 encoded_source_root = EncodedSource.from_str(source_str) datasource = DataSource(encoded_source_root) run_order = [] expected_run_order = [cte_1] def apply_to_encoded(hash: str, source: str, run_order: List[str] = run_order): #print(f"hash:{hash}, source:{source}") run_order.append(source) datasource.apply_dependency_first(apply_to_encoded) self.assertEqual(run_order, expected_run_order)
def test_cte_encode_matches(self): source_str = basic_str encoded_source = EncodedSource( DecomposedSource(ParsedSource(Source(source_str)))) source_str_2 = basic_whitespace_str encoded_source_2 = EncodedSource( DecomposedSource(ParsedSource(Source(source_str_2)))) self.assertIsNotNone(encoded_source_2) self.assertEqual(encoded_source.encoded_sources(), encoded_source_2.encoded_sources())
def test_match_whitespace_diff(self): source_str = "SELECT * FROM `universe.galaxy.system`" target_str = "SELECT * FROM `universe.galaxy.system` " source = Source(source_str) target = Source(target_str) encoded_source = EncodedSource(DecomposedSource(ParsedSource(source))) encoded_target = EncodedSource(DecomposedSource(ParsedSource(target))) self.assertIsNotNone(encoded_source) self.assertIsNotNone(encoded_target) self.assertNotEqual(source_str, target_str) self.assertNotEqual(source.source(), target.source()) self.assertEqual(encoded_source.encoded_sources(), encoded_target.encoded_sources())
def test_cte_date_dim_encode_cached(self): source_str = basic_str encoded_source_root = EncodedSource.from_str(source_str, prefix="cached_") self.assertIsNotNone(encoded_source_root)
def test_cte_basic_encode(self): source_str = basic_str source = Source(source_str) encoded = EncodedSource(DecomposedSource(ParsedSource(source))) self.assertIsNotNone(encoded)
def test_complex_encode(self): source_str = complex_query encoded_source_root = EncodedSource.from_str(source_str) datasource = DataSource(encoded_source_root) self.assertIsNotNone(datasource)
def test_simplified_complex_encode(self): with open("../resources/complex_simplified.sql", "r") as sql_file: datasource = DataSource( EncodedSource.from_str(sql_file.read(), prefix="cached_")) self.assertIsNotNone(datasource)
def test_basicsource(self): datasource = DataSource(EncodedSource.from_str(date_dim_query)) for hashed, source in datasource.all_encoded_sources().items(): print(f"hash:{hashed} source:{source.encoded_sources()}")
def test_datasource(self): datasource = DataSource(EncodedSource.from_str(cte_1)) self.assertIsNotNone(datasource)
def test_from_encode(self): encoded = EncodedSource.from_str(cte_1) self.assertIsNotNone(encoded)