def test_delete_migration(self) -> None: migration = DeleteFromRawTableMigration( migrations_file=self._migration_file_path_for_tag("tagC"), update_datetime_filters=[_DATE_1], filters=[("COL1", "31415")], ) with local_project_id_override("recidiviz-456"): project_1_queries = RawTableMigrationGenerator.migration_queries( [migration]) with local_project_id_override("recidiviz-789"): project_2_queries = RawTableMigrationGenerator.migration_queries( [migration]) expected_project_1_queries = [ """DELETE FROM `recidiviz-456.us_xx_raw_data.tagC` WHERE STRUCT(COL1, update_datetime) IN ( STRUCT('31415', '2020-04-14T00:31:00') );""" ] self.assertEqual(expected_project_1_queries, project_1_queries) expected_project_2_queries = [ """DELETE FROM `recidiviz-789.us_xx_raw_data.tagC` WHERE STRUCT(COL1, update_datetime) IN ( STRUCT('31415', '2020-04-14T00:31:00') );""" ] self.assertEqual(expected_project_2_queries, project_2_queries)
def test_print_migrations(self) -> None: migrations_file = self._migration_file_path_for_tag("tagC") migrations = [ DeleteFromRawTableMigration( migrations_file=migrations_file, update_datetime_filters=[_DATE_1, _DATE_2], filters=[("COL1", "31415")], ), DeleteFromRawTableMigration( migrations_file=migrations_file, update_datetime_filters=None, filters=[("COL2", "1234")], ), UpdateRawTableMigration( migrations_file=migrations_file, update_datetime_filters=[ _DATE_1, ], filters=[("COL1", "31415")], updates=[("COL1", "91011")], ), UpdateRawTableMigration( migrations_file=migrations_file, update_datetime_filters=None, filters=[("COL1", "31415")], updates=[("COL2", "91011")], ), ] # Shouldn't crash with local_project_id_override("recidiviz-456"): RawTableMigrationGenerator.print_list(migrations)
def test_update_migration_update_datetime_agnostic(self) -> None: migration = UpdateRawTableMigration( migrations_file=self._migration_file_path_for_tag("tagC"), update_datetime_filters=None, filters=[("COL1", "31415")], updates=[("COL1", "91011")], ) with local_project_id_override("recidiviz-456"): project_1_queries = RawTableMigrationGenerator.migration_queries( [migration]) with local_project_id_override("recidiviz-789"): project_2_queries = RawTableMigrationGenerator.migration_queries( [migration]) expected_project_1_queries = [ """UPDATE `recidiviz-456.us_xx_raw_data.tagC` original SET COL1 = updates.new__COL1 FROM (SELECT * FROM UNNEST([ STRUCT('31415' AS COL1, '91011' AS new__COL1) ])) updates WHERE original.COL1 = updates.COL1;""" ] self.assertEqual(expected_project_1_queries, project_1_queries) expected_project_2_queries = [ """UPDATE `recidiviz-789.us_xx_raw_data.tagC` original SET COL1 = updates.new__COL1 FROM (SELECT * FROM UNNEST([ STRUCT('31415' AS COL1, '91011' AS new__COL1) ])) updates WHERE original.COL1 = updates.COL1;""" ] self.assertEqual(expected_project_2_queries, project_2_queries)
def test_dont_merge_delete_and_update_migrations(self) -> None: migrations_file = self._migration_file_path_for_tag("tagC") migrations: List[RawTableMigration] = [ DeleteFromRawTableMigration( migrations_file=migrations_file, update_datetime_filters=[_DATE_1], filters=[("COL1", "1234")], ), UpdateRawTableMigration( migrations_file=migrations_file, update_datetime_filters=[_DATE_1], filters=[("COL1", "456")], updates=[("COL2", "4567")], ), ] with local_project_id_override("recidiviz-456"): queries_map = RawTableMigrationGenerator.migration_queries( migrations) expected_queries_map = [ f"""DELETE FROM `recidiviz-456.us_xx_raw_data.tagC` WHERE STRUCT(COL1, update_datetime) IN ( STRUCT('1234', '{_DATE_1.isoformat()}') );""", f"""UPDATE `recidiviz-456.us_xx_raw_data.tagC` original SET COL2 = updates.new__COL2 FROM (SELECT * FROM UNNEST([ STRUCT('456' AS COL1, CAST('{_DATE_1.isoformat()}' AS DATETIME) AS update_datetime, '4567' AS new__COL2) ])) updates WHERE original.COL1 = updates.COL1 AND original.update_datetime = updates.update_datetime;""", ] self.assertEqual(expected_queries_map, queries_map)
def test_merge_update_migrations_no_datetime_filter(self) -> None: migrations_file = self._migration_file_path_for_tag("tagC") migrations: List[RawTableMigration] = [ UpdateRawTableMigration( migrations_file=migrations_file, update_datetime_filters=None, filters=[("COL1", "123")], updates=[("COL2", "1234")], ), UpdateRawTableMigration( migrations_file=migrations_file, update_datetime_filters=None, filters=[("COL1", "456")], updates=[("COL2", "4567")], ), ] with local_project_id_override("recidiviz-456"): queries_map = RawTableMigrationGenerator.migration_queries( migrations) expected_queries_map = [ """UPDATE `recidiviz-456.us_xx_raw_data.tagC` original SET COL2 = updates.new__COL2 FROM (SELECT * FROM UNNEST([ STRUCT('123' AS COL1, '1234' AS new__COL2), STRUCT('456' AS COL1, '4567' AS new__COL2) ])) updates WHERE original.COL1 = updates.COL1;""" ] self.assertEqual(expected_queries_map, queries_map)
def test_dont_merge_delete_migrations_different_filter_keys_same_values( self, ) -> None: migrations_file = self._migration_file_path_for_tag("tagC") migrations: List[RawTableMigration] = [ DeleteFromRawTableMigration( migrations_file=migrations_file, update_datetime_filters=[_DATE_1], filters=[("COL1", "1234")], ), DeleteFromRawTableMigration( migrations_file=migrations_file, update_datetime_filters=[_DATE_1], filters=[("COL2", "1234")], ), ] with local_project_id_override("recidiviz-456"): queries_map = RawTableMigrationGenerator.migration_queries( migrations) expected_queries_map = [ f"""DELETE FROM `recidiviz-456.us_xx_raw_data.tagC` WHERE STRUCT(COL1, update_datetime) IN ( STRUCT('1234', '{_DATE_1.isoformat()}') );""", f"""DELETE FROM `recidiviz-456.us_xx_raw_data.tagC` WHERE STRUCT(COL2, update_datetime) IN ( STRUCT('1234', '{_DATE_1.isoformat()}') );""", ] self.assertEqual(expected_queries_map, queries_map)
def collect_raw_table_migration_queries(self) -> Dict[str, List[str]]: """Finds all migrations defined for this region and returns a dict indexing all migration queries that should be run for a given raw file. """ migrations_list = self.collect_raw_table_migrations() migrations_by_file_tag = defaultdict(list) for migration in migrations_list: migrations_by_file_tag[migration.file_tag].append(migration) return { file_tag: RawTableMigrationGenerator.migration_queries(migrations) for file_tag, migrations in migrations_by_file_tag.items() }
def test_delete_migration_multiple_filters_and_dates(self) -> None: migration = DeleteFromRawTableMigration( migrations_file=self._migration_file_path_for_tag( "file_tag_first"), update_datetime_filters=[_DATE_1, _DATE_2], filters=[("col_name_1a", "31415"), ("col_name_1b", "45678")], ) with local_project_id_override("recidiviz-456"): queries = RawTableMigrationGenerator.migration_queries([migration]) expected_queries = [ """DELETE FROM `recidiviz-456.us_xx_raw_data.file_tag_first` WHERE STRUCT(col_name_1a, col_name_1b, update_datetime) IN ( STRUCT('31415', '45678', '2020-04-14T00:31:00'), STRUCT('31415', '45678', '2020-08-16T01:02:03') );""" ] self.assertEqual(expected_queries, queries)
def test_merge_delete_migrations_one_missing_datetime_filter(self) -> None: migrations_file = self._migration_file_path_for_tag("tagC") migrations: List[RawTableMigration] = [ DeleteFromRawTableMigration( migrations_file=migrations_file, update_datetime_filters=[_DATE_2], filters=[("COL1", "31415")], ), DeleteFromRawTableMigration( migrations_file=migrations_file, update_datetime_filters=None, filters=[("COL1", "6666")], ), DeleteFromRawTableMigration( migrations_file=migrations_file, update_datetime_filters=[_DATE_1], filters=[("COL1", "1234")], ), ] with local_project_id_override("recidiviz-456"): queries_map = RawTableMigrationGenerator.migration_queries( migrations) expected_queries_map = [ f"""DELETE FROM `recidiviz-456.us_xx_raw_data.tagC` WHERE STRUCT(COL1, update_datetime) IN ( STRUCT('31415', '{_DATE_2.isoformat()}'), STRUCT('1234', '{_DATE_1.isoformat()}') );""", """DELETE FROM `recidiviz-456.us_xx_raw_data.tagC` WHERE STRUCT(COL1) IN ( STRUCT('6666') );""", ] self.assertEqual(expected_queries_map, queries_map)
def test_update_migration_multiples(self) -> None: migration = UpdateRawTableMigration( migrations_file=self._migration_file_path_for_tag( "file_tag_first"), update_datetime_filters=[_DATE_1, _DATE_2], filters=[("col_name_1a", "12345"), ("col_name_1b", "4567")], updates=[("col_name_1a", "4567"), ("col_name_1b", "12345")], ) with local_project_id_override("recidiviz-456"): queries = RawTableMigrationGenerator.migration_queries([migration]) expected_queries = [ """UPDATE `recidiviz-456.us_xx_raw_data.file_tag_first` original SET col_name_1a = updates.new__col_name_1a, col_name_1b = updates.new__col_name_1b FROM (SELECT * FROM UNNEST([ STRUCT('12345' AS col_name_1a, '4567' AS col_name_1b, CAST('2020-04-14T00:31:00' AS DATETIME) AS update_datetime, '4567' AS new__col_name_1a, '12345' AS new__col_name_1b), STRUCT('12345' AS col_name_1a, '4567' AS col_name_1b, CAST('2020-08-16T01:02:03' AS DATETIME) AS update_datetime, '4567' AS new__col_name_1a, '12345' AS new__col_name_1b) ])) updates WHERE original.col_name_1a = updates.col_name_1a AND original.col_name_1b = updates.col_name_1b AND original.update_datetime = updates.update_datetime;""", ] for query in queries: print(query) self.assertEqual(expected_queries, queries)