Exemplo n.º 1
0
 def _get_raw_data_file_configs(
         self) -> Dict[str, DirectIngestRawFileConfig]:
     return {
         'tagA':
         DirectIngestRawFileConfig(
             file_tag='tagA',
             primary_key_cols=['mockKey'],
             datetime_cols=[],
             supplemental_order_by_clause='',
             encoding='UTF-8',
             separator=',',
             ignore_quotes=False,
             always_historical_export=False,
         ),
         'tagB':
         DirectIngestRawFileConfig(
             file_tag='tagB',
             primary_key_cols=['mockKey'],
             datetime_cols=[],
             supplemental_order_by_clause='',
             encoding='UTF-8',
             separator=',',
             ignore_quotes=False,
             always_historical_export=False,
         ),
         'tagC':
         DirectIngestRawFileConfig(
             file_tag='tagC',
             primary_key_cols=['mockKey'],
             datetime_cols=[],
             supplemental_order_by_clause='',
             encoding='UTF-8',
             separator=',',
             ignore_quotes=False,
             always_historical_export=False,
         ),
         'tagWeDoNotIngest':
         DirectIngestRawFileConfig(
             file_tag='tagWeDoNotIngest',
             primary_key_cols=[],
             datetime_cols=[],
             supplemental_order_by_clause='',
             encoding='UTF-8',
             separator=',',
             ignore_quotes=False,
             always_historical_export=False,
         )
     }
    def test_raw_latest_view(self):
        view = DirectIngestRawDataTableLatestView(
            region_code='us_xx',
            raw_file_config=DirectIngestRawFileConfig(
                file_tag='table_name',
                primary_key_cols=['col1', 'col2'],
                datetime_cols=[],
                supplemental_order_by_clause='CAST(seq_num AS INT64)',
                encoding='any-encoding',
                separator='@',
                ignore_quotes=False))

        self.assertEqual(self.PROJECT_ID, view.project)
        self.assertEqual('us_xx_raw_data_up_to_date_views', view.dataset_id)
        self.assertEqual('table_name_latest', view.table_id)
        self.assertEqual('table_name_latest', view.view_id)

        expected_view_query = RAW_DATA_LATEST_VIEW_QUERY_TEMPLATE.format(
            project_id=self.PROJECT_ID,
            raw_table_primary_key_str='col1, col2',
            raw_table_dataset_id='us_xx_raw_data',
            raw_table_name='table_name',
            except_clause='EXCEPT (file_id, update_datetime)',
            datetime_cols_clause='',
            supplemental_order_by_clause=', CAST(seq_num AS INT64)')

        self.assertEqual(expected_view_query, view.view_query)
        self.assertEqual(
            'SELECT * FROM `recidiviz-456.us_xx_raw_data_up_to_date_views.table_name_latest`',
            view.select_query)
    def test_raw_up_to_date_view(self) -> None:
        view = DirectIngestRawDataTableUpToDateView(
            region_code="us_xx",
            raw_file_config=DirectIngestRawFileConfig(
                file_tag="table_name",
                file_path="path/to/file.yaml",
                file_description="file description",
                primary_key_cols=["col1"],
                columns=[
                    RawTableColumnInfo(name="col1",
                                       is_datetime=False,
                                       description="col1 description"),
                    RawTableColumnInfo(name="col2",
                                       is_datetime=True,
                                       description="col2 description"),
                    RawTableColumnInfo(name="undocumented_column",
                                       is_datetime=True,
                                       description=None),
                ],
                supplemental_order_by_clause="",
                encoding="any-encoding",
                separator="@",
                ignore_quotes=False,
                always_historical_export=False,
            ),
        )

        self.assertEqual(self.PROJECT_ID, view.project)
        self.assertEqual("us_xx_raw_data_up_to_date_views", view.dataset_id)
        self.assertEqual("table_name_by_update_date", view.table_id)
        self.assertEqual("table_name_by_update_date", view.view_id)

        expected_datetime_cols_clause = """
        COALESCE(
            CAST(SAFE_CAST(col2 AS DATETIME) AS STRING),
            CAST(SAFE_CAST(SAFE.PARSE_DATE('%m/%d/%y', col2) AS DATETIME) AS STRING),
            CAST(SAFE_CAST(SAFE.PARSE_DATE('%m/%d/%Y', col2) AS DATETIME) AS STRING),
            CAST(SAFE_CAST(SAFE.PARSE_TIMESTAMP('%Y-%m-%d %H:%M', col2) AS DATETIME) AS STRING),
            CAST(SAFE_CAST(SAFE.PARSE_TIMESTAMP('%m/%d/%Y %H:%M:%S', col2) AS DATETIME) AS STRING),
            col2
        ) AS col2"""

        expected_view_query = RAW_DATA_UP_TO_DATE_VIEW_QUERY_TEMPLATE.format(
            project_id=self.PROJECT_ID,
            raw_table_primary_key_str="col1",
            raw_table_dataset_id="us_xx_raw_data",
            raw_table_name="table_name",
            columns_clause=f"col1, {expected_datetime_cols_clause}",
            legacy_except_clause="",
            legacy_datetime_cols_clause="",
            supplemental_order_by_clause="",
        )

        self.assertEqual(expected_view_query, view.view_query)
        self.assertEqual(
            "SELECT * FROM `recidiviz-456.us_xx_raw_data_up_to_date_views.table_name_by_update_date`",
            view.select_query,
        )
 def _get_raw_data_file_configs(
         self) -> Dict[str, DirectIngestRawFileConfig]:
     return {
         'tagA':
         DirectIngestRawFileConfig(
             file_tag='tagA',
             primary_key_cols=[],
             datetime_cols=[],
             encoding='UTF-8',
             separator=',',
             ignore_quotes=False,
         ),
         'tagB':
         DirectIngestRawFileConfig(
             file_tag='tagB',
             primary_key_cols=[],
             datetime_cols=[],
             encoding='UTF-8',
             separator=',',
             ignore_quotes=False,
         ),
         'tagC':
         DirectIngestRawFileConfig(
             file_tag='tagC',
             primary_key_cols=[],
             datetime_cols=[],
             encoding='UTF-8',
             separator=',',
             ignore_quotes=False,
         ),
         'tagWeDoNotIngest':
         DirectIngestRawFileConfig(
             file_tag='tagWeDoNotIngest',
             primary_key_cols=[],
             datetime_cols=[],
             encoding='UTF-8',
             separator=',',
             ignore_quotes=False,
         )
     }
    def test_raw_latest_historical_file_view(self) -> None:
        view = DirectIngestRawDataTableLatestView(
            region_code="us_xx",
            raw_file_config=DirectIngestRawFileConfig(
                file_tag="table_name",
                file_path="path/to/file.yaml",
                file_description="file description",
                primary_key_cols=["col1", "col2"],
                columns=[
                    RawTableColumnInfo(name="col1",
                                       is_datetime=False,
                                       description="col1 description"),
                    RawTableColumnInfo(name="col2",
                                       is_datetime=False,
                                       description="col2 description"),
                ],
                supplemental_order_by_clause="CAST(seq_num AS INT64)",
                encoding="any-encoding",
                separator="@",
                ignore_quotes=False,
                always_historical_export=True,
            ),
            dataset_overrides=None,
        )

        self.assertEqual(self.PROJECT_ID, view.project)
        self.assertEqual("us_xx_raw_data_up_to_date_views", view.dataset_id)
        self.assertEqual("table_name_latest", view.table_id)
        self.assertEqual("table_name_latest", view.view_id)

        expected_view_query = (
            RAW_DATA_LATEST_HISTORICAL_FILE_VIEW_QUERY_TEMPLATE.format(
                project_id=self.PROJECT_ID,
                raw_table_primary_key_str="col1, col2",
                raw_table_dataset_id="us_xx_raw_data",
                raw_table_name="table_name",
                columns_clause="col1, col2",
                legacy_except_clause="",
                legacy_datetime_cols_clause="",
                supplemental_order_by_clause=", CAST(seq_num AS INT64)",
            ))

        self.assertEqual(expected_view_query, view.view_query)
        self.assertEqual(
            "SELECT * FROM `recidiviz-456.us_xx_raw_data_up_to_date_views.table_name_latest`",
            view.select_query,
        )
    def test_raw_up_to_date_historical_file_view(self) -> None:
        view = DirectIngestRawDataTableUpToDateView(
            region_code='us_xx',
            raw_file_config=DirectIngestRawFileConfig(
                file_tag='table_name',
                primary_key_cols=['col1'],
                datetime_cols=['col2'],
                supplemental_order_by_clause='',
                encoding='any-encoding',
                separator='@',
                ignore_quotes=False,
                always_historical_export=True,
            )
        )

        self.assertEqual(self.PROJECT_ID, view.project)
        self.assertEqual('us_xx_raw_data_up_to_date_views', view.dataset_id)
        self.assertEqual('table_name_by_update_date', view.table_id)
        self.assertEqual('table_name_by_update_date', view.view_id)

        expected_datetime_cols_clause = """
        COALESCE(
            CAST(SAFE_CAST(col2 AS DATETIME) AS STRING),
            CAST(SAFE_CAST(SAFE.PARSE_DATE('%m/%d/%y', col2) AS DATETIME) AS STRING),
            CAST(SAFE_CAST(SAFE.PARSE_DATE('%m/%d/%Y', col2) AS DATETIME) AS STRING),
            CAST(SAFE_CAST(SAFE.PARSE_TIMESTAMP('%Y-%m-%d %H:%M', col2) AS DATETIME) AS STRING),
            CAST(SAFE_CAST(SAFE.PARSE_TIMESTAMP('%m/%d/%Y %H:%M:%S', col2) AS DATETIME) AS STRING),
            col2
        ) AS col2,"""

        expected_view_query = RAW_DATA_UP_TO_DATE_HISTORICAL_FILE_VIEW_QUERY_TEMPLATE.format(
            project_id=self.PROJECT_ID,
            raw_table_primary_key_str='col1',
            raw_table_dataset_id='us_xx_raw_data',
            raw_table_name='table_name',
            except_clause='EXCEPT (col2, file_id, update_datetime)',
            datetime_cols_clause=expected_datetime_cols_clause,
            supplemental_order_by_clause=''
        )

        self.assertEqual(expected_view_query, view.view_query)
        self.assertEqual('SELECT * FROM `recidiviz-456.us_xx_raw_data_up_to_date_views.table_name_by_update_date`',
                         view.select_query)
Exemplo n.º 7
0
 def _get_raw_data_file_configs(
         self) -> Dict[str, DirectIngestRawFileConfig]:
     return {
         "tagA":
         DirectIngestRawFileConfig(
             file_tag="tagA",
             file_path="path/to/tagA.yaml",
             file_description="file description",
             primary_key_cols=["mockKey"],
             columns=[
                 RawTableColumnInfo(
                     name="mockKey",
                     description="mockKey description",
                     is_datetime=False,
                 )
             ],
             supplemental_order_by_clause="",
             encoding="UTF-8",
             separator=",",
             custom_line_terminator=None,
             ignore_quotes=False,
             always_historical_export=False,
         ),
         "tagB":
         DirectIngestRawFileConfig(
             file_tag="tagB",
             file_path="path/to/tagB.yaml",
             file_description="file description",
             primary_key_cols=["mockKey"],
             columns=[
                 RawTableColumnInfo(
                     name="mockKey",
                     description="mockKey description",
                     is_datetime=False,
                 )
             ],
             supplemental_order_by_clause="",
             encoding="UTF-8",
             separator=",",
             custom_line_terminator=None,
             ignore_quotes=False,
             always_historical_export=False,
         ),
         "tagC":
         DirectIngestRawFileConfig(
             file_tag="tagC",
             file_path="path/to/tagC.yaml",
             file_description="file description",
             primary_key_cols=["mockKey"],
             columns=[
                 RawTableColumnInfo(
                     name="mockKey",
                     description="mockKey description",
                     is_datetime=False,
                 )
             ],
             supplemental_order_by_clause="",
             encoding="UTF-8",
             separator=",",
             custom_line_terminator=None,
             ignore_quotes=False,
             always_historical_export=False,
         ),
         "tagWeDoNotIngest":
         DirectIngestRawFileConfig(
             file_tag="tagWeDoNotIngest",
             file_path="path/to/tagWeDoNotIngest.yaml",
             file_description="file description",
             primary_key_cols=[],
             columns=[],
             supplemental_order_by_clause="",
             encoding="UTF-8",
             separator=",",
             custom_line_terminator=None,
             ignore_quotes=False,
             always_historical_export=False,
         ),
     }