Exemplo n.º 1
0
    def test_test_is_ingest_launched_in_env_staging(
            self, mock_environment: MagicMock) -> None:
        mock_environment.return_value = "staging"

        region = fake_region()
        self.assertTrue(region.is_ingest_launched_in_env())

        region = fake_region(environment="staging", )
        self.assertTrue(region.is_ingest_launched_in_env())

        region = fake_region(environment="production", )
        self.assertTrue(region.is_ingest_launched_in_env())
Exemplo n.º 2
0
    def test_handle_file_start_ingest_unsupported_region(
            self, mock_region, mock_environment):
        region_code = 'us_nd'

        mock_environment.return_value = 'production'
        mock_controller = create_autospec(GcsfsDirectIngestController)
        mock_region.return_value = fake_region(region_code=region_code,
                                               environment='staging',
                                               ingestor=mock_controller)

        path = GcsfsFilePath.from_absolute_path(
            'bucket-us-nd/elite_offenders.csv')

        request_args = {
            'region': region_code,
            'bucket': path.bucket_name,
            'relative_file_path': path.blob_name,
            'start_ingest': 'False',
        }
        headers = {'X-Appengine-Cron': 'test-cron'}

        response = self.client.get('/handle_direct_ingest_file',
                                   query_string=request_args,
                                   headers=headers)

        mock_region.assert_called_with('us_nd', is_direct_ingest=True)
        mock_controller.handle_file.assert_called_with(path, False)

        # Even though the region isn't supported, we don't crash - the
        # controller handles not starting ingest, and if it does by accident,
        # the actual schedule/process_job endpoints handle the unlaunched
        # region check.
        self.assertEqual(200, response.status_code)
Exemplo n.º 3
0
    def test_handle_file_start_ingest(self, mock_region, mock_environment):
        region_code = 'us_nd'

        mock_environment.return_value = 'production'
        mock_controller = create_autospec(GcsfsDirectIngestController)
        mock_region.return_value = fake_region(region_code=region_code,
                                               environment='production',
                                               ingestor=mock_controller)
        path = GcsfsFilePath.from_absolute_path(
            'bucket-us-nd/elite_offenders.csv')

        request_args = {
            'region': region_code,
            'bucket': path.bucket_name,
            'relative_file_path': path.blob_name,
            'start_ingest': 'True',
        }
        headers = {'X-Appengine-Cron': 'test-cron'}
        response = self.client.get('/handle_direct_ingest_file',
                                   query_string=request_args,
                                   headers=headers)

        mock_controller.handle_file.assert_called_with(path, True)

        # Even though the region isn't supported, we don't crash
        self.assertEqual(200, response.status_code)
Exemplo n.º 4
0
    def test_process_job_unlaunched_region(self, mock_supported, mock_region,
                                           mock_environment):
        mock_supported.return_value = ['us_ca', 'us_pa']

        region_code = 'us_ca'

        mock_environment.return_value = 'production'
        mock_controller = create_autospec(GcsfsDirectIngestController)
        mock_region.return_value = fake_region(region_code=region_code,
                                               environment='staging',
                                               ingestor=mock_controller)

        ingest_args = IngestArgs(datetime.datetime(year=2019, month=7, day=20))
        request_args = {
            'region': region_code,
        }
        body = {
            'cloud_task_args': ingest_args.to_serializable(),
            'args_type': 'IngestArgs',
        }
        body_encoded = json.dumps(body).encode()

        headers = {'X-Appengine-Cron': 'test-cron'}

        response = self.client.post('/process_job',
                                    query_string=request_args,
                                    headers=headers,
                                    data=body_encoded)
        self.assertEqual(400, response.status_code)
        self.assertEqual(response.get_data().decode(),
                         "Bad environment [production] for region [us_ca].")
Exemplo n.º 5
0
    def test_process_job(self, mock_supported, mock_region, mock_environment):
        mock_supported.return_value = ['us_nd', 'us_pa']

        region_code = 'us_nd'

        mock_environment.return_value = 'staging'
        mock_controller = create_autospec(GcsfsDirectIngestController)
        mock_region.return_value = fake_region(region_code=region_code,
                                               environment='staging',
                                               ingestor=mock_controller)

        ingest_args = IngestArgs(datetime.datetime(year=2019, month=7, day=20))
        request_args = {
            'region': region_code,
        }
        body = {
            'cloud_task_args': ingest_args.to_serializable(),
            'args_type': 'IngestArgs',
        }
        body_encoded = json.dumps(body).encode()

        headers = {'X-Appengine-Cron': 'test-cron'}

        response = self.client.post('/process_job',
                                    query_string=request_args,
                                    headers=headers,
                                    data=body_encoded)
        self.assertEqual(200, response.status_code)
        mock_controller.run_ingest_job_and_kick_scheduler_on_completion.assert_called_with(
            ingest_args)
Exemplo n.º 6
0
    def test_handle_file_start_ingest_unsupported_region(
            self, mock_region, mock_environment, mock_fs_factory_cls, client):
        region_code = 'us_nd'

        mock_environment.return_value = 'production'
        mock_region.return_value = fake_region(region_code=region_code,
                                               environment='staging')

        fake_fs = FakeDirectIngestGCSFileSystem()
        fake_fs.test_add_path('bucket-us-nd/elite_offenders.csv')
        mock_fs_factory_cls.build.return_value = fake_fs

        request_args = {
            'region': region_code,
            'bucket': 'bucket-us-nd',
            'relative_file_path': 'elite_offenders.csv',
            'start_ingest': 'true',
        }
        headers = {'X-Appengine-Cron': "test-cron"}

        with pytest.raises(DirectIngestError):
            response = client.get('/handle_direct_ingest_file',
                                  query_string=request_args,
                                  headers=headers)

            # Even though the region isn't supported, we don't crash
            assert response.status_code == 400

        mock_region.assert_called_with('us_nd', is_direct_ingest=True)
Exemplo n.º 7
0
    def test_schedule_diff_environment_in_production(self, mock_region,
                                                     mock_environment):
        """Tests that the start operation chains together the correct calls."""
        mock_environment.return_value = 'production'
        mock_controller = create_autospec(GcsfsDirectIngestController)

        region = 'us_nd'

        mock_region.return_value = fake_region(environment='staging',
                                               region_code=region,
                                               ingestor=mock_controller)

        request_args = {'region': region}
        headers = {'X-Appengine-Cron': 'test-cron'}
        response = self.client.get('/scheduler',
                                   query_string=request_args,
                                   headers=headers)

        mock_controller.schedule_next_ingest_job_or_wait_if_necessary.assert_not_called(
        )
        self.assertEqual(400, response.status_code)
        self.assertEqual(response.get_data().decode(),
                         "Bad environment [production] for region [us_nd].")

        mock_region.assert_called_with('us_nd', is_direct_ingest=True)
Exemplo n.º 8
0
    def test_raw_data_import(self, mock_supported, mock_region,
                             mock_environment):
        mock_supported.return_value = ['us_xx']

        region_code = 'us_xx'

        mock_environment.return_value = 'staging'
        mock_controller = create_autospec(GcsfsDirectIngestController)
        mock_region.return_value = fake_region(region_code=region_code,
                                               environment='staging',
                                               ingestor=mock_controller)

        import_args = GcsfsRawDataBQImportArgs(
            raw_data_file_path=GcsfsFilePath.from_absolute_path(
                to_normalized_unprocessed_file_path(
                    'bucket/raw_data_path.csv',
                    file_type=GcsfsDirectIngestFileType.RAW_DATA)))
        request_args = {
            'region': region_code,
        }
        body = {
            'cloud_task_args': import_args.to_serializable(),
            'args_type': 'GcsfsRawDataBQImportArgs',
        }
        body_encoded = json.dumps(body).encode()

        headers = {'X-Appengine-Cron': 'test-cron'}

        response = self.client.post('/raw_data_import',
                                    query_string=request_args,
                                    headers=headers,
                                    data=body_encoded)
        self.assertEqual(200, response.status_code)
        mock_controller.do_raw_data_import.assert_called_with(import_args)
Exemplo n.º 9
0
    def test_start_existing_session(self, mock_purge, mock_get_sessions,
                                    mock_docket, mock_tracker,
                                    mock_create_session, mock_region,
                                    mock_supported, mock_environment):
        """Tests that the start operation halts if an open session exists."""
        region = 'us_ut'

        mock_get_sessions.return_value = iter([
            sessions.ScrapeSession.new(
                key=None,
                region=region,
                scrape_type=constants.ScrapeType.BACKGROUND,
                phase=scrape_phase.ScrapePhase.SCRAPE)
        ])
        mock_scraper = create_autospec(BaseScraper)
        mock_region.return_value = fake_region(environment='production',
                                               ingestor=mock_scraper)
        mock_environment.return_value = 'production'
        mock_supported.side_effect = _MockSupported

        scrape_type = constants.ScrapeType.BACKGROUND
        request_args = {'region': region, 'scrape_type': scrape_type.value}
        headers = {'X-Appengine-Cron': 'test-cron'}
        response = self.client.get('/start',
                                   query_string=request_args,
                                   headers=headers)
        assert response.status_code == 500
        assert not mock_purge.called
        assert not mock_create_session.called
        assert not mock_tracker.called
        assert not mock_docket.called
        assert not mock_region.called
        mock_supported.assert_called_with(stripes=[], timezone=None)
        mock_scraper.start_scrape.assert_not_called()
Exemplo n.º 10
0
    def test_start_all_diff_environment(self, mock_purge, mock_get_sessions,
                                        mock_docket, mock_tracker,
                                        mock_create_session, mock_region,
                                        mock_supported, mock_environment):
        """Tests that the start operation chains together the correct calls."""
        mock_environment.return_value = 'staging'
        mock_scraper = create_autospec(BaseScraper)
        mock_region.return_value = fake_region(environment='production',
                                               ingestor=mock_scraper)
        mock_supported.side_effect = _MockSupported

        region = 'all'
        scrape_type = constants.ScrapeType.BACKGROUND
        request_args = {'region': region, 'scrape_type': scrape_type.value}
        headers = {'X-Appengine-Cron': 'test-cron'}
        response = self.client.get('/start',
                                   query_string=request_args,
                                   headers=headers)
        assert response.status_code == 400

        assert not mock_get_sessions.called
        assert not mock_docket.called
        assert not mock_tracker.called
        assert not mock_create_session.called
        assert not mock_purge.called
        mock_region.assert_called_with('us_wy')
        mock_scraper.start_scrape.assert_not_called()
        mock_supported.assert_called_with(stripes=[], timezone=None)
Exemplo n.º 11
0
    def test_stop_no_session(self, mock_sessions, mock_enqueue, mock_region,
                             mock_supported, client):
        mock_sessions.return_value = []
        mock_region.return_value = fake_region()
        mock_supported.return_value = ['us_ca', 'us_ut']

        request_args = {
            'region': 'all',
            'scrape_type': 'all',
            'respect_is_stoppable': 'false'
        }

        headers = {'X-Appengine-Cron': "test-cron"}
        response = client.get('/stop',
                              query_string=request_args,
                              headers=headers)
        assert response.status_code == 200

        mock_sessions.assert_has_calls([
            call(ScrapeKey('us_ca', constants.ScrapeType.BACKGROUND)),
            call(ScrapeKey('us_ca', constants.ScrapeType.SNAPSHOT)),
            call(ScrapeKey('us_ut', constants.ScrapeType.BACKGROUND)),
            call(ScrapeKey('us_ut', constants.ScrapeType.SNAPSHOT))
        ],
                                       any_order=True)
        assert not mock_region.return_value.get_ingestor().\
            stop_scrape.called
        mock_supported.assert_called_with(timezone=None)
        assert not mock_enqueue.called
Exemplo n.º 12
0
 def create_fake_region(ingest_view_exports_enabled: bool = True) -> Region:
     return fake_region(
         region_code="US_XX",
         is_raw_vs_ingest_file_name_detection_enabled=True,
         are_raw_data_bq_imports_enabled_in_env=True,
         are_ingest_view_exports_enabled_in_env=ingest_view_exports_enabled,
     )
Exemplo n.º 13
0
    def test_start_all_diff_environment(
        self,
        mock_purge: Mock,
        mock_get_sessions: Mock,
        mock_docket: Mock,
        mock_tracker: Mock,
        mock_create_session: Mock,
        mock_region: Mock,
        mock_supported: Mock,
        mock_environment: Mock,
    ) -> None:
        """Tests that the start operation chains together the correct calls."""
        mock_environment.return_value = "staging"
        mock_scraper = create_autospec(BaseScraper)
        mock_region.return_value = fake_region(environment="production",
                                               scraper=mock_scraper)
        mock_supported.side_effect = _MockSupported

        region = "all"
        scrape_type = constants.ScrapeType.BACKGROUND
        request_args = {"region": region, "scrape_type": scrape_type.value}
        headers = {"X-Appengine-Cron": "test-cron"}
        response = self.client.get("/start",
                                   query_string=request_args,
                                   headers=headers)
        assert response.status_code == 400

        assert not mock_get_sessions.called
        assert not mock_docket.called
        assert not mock_tracker.called
        assert not mock_create_session.called
        assert not mock_purge.called
        mock_region.assert_called_with("us_wy")
        mock_scraper.start_scrape.assert_not_called()
        mock_supported.assert_called_with(stripes=[], timezone=None)
Exemplo n.º 14
0
    def test_stop_no_session(
        self, mock_sessions, mock_task_manager, mock_region, mock_supported
    ):
        mock_sessions.return_value = None
        mock_scraper = create_autospec(BaseScraper)
        mock_region.return_value = fake_region(ingestor=mock_scraper)
        mock_supported.return_value = ["us_ca", "us_ut"]

        request_args = {
            "region": "all",
            "scrape_type": "all",
            "respect_is_stoppable": "false",
        }

        headers = {"X-Appengine-Cron": "test-cron"}
        response = self.client.get("/stop", query_string=request_args, headers=headers)
        assert response.status_code == 200

        mock_sessions.assert_has_calls(
            [
                call(ScrapeKey("us_ca", constants.ScrapeType.BACKGROUND)),
                call(ScrapeKey("us_ca", constants.ScrapeType.SNAPSHOT)),
                call(ScrapeKey("us_ut", constants.ScrapeType.BACKGROUND)),
                call(ScrapeKey("us_ut", constants.ScrapeType.SNAPSHOT)),
            ],
            any_order=True,
        )
        mock_scraper.stop_scrape.assert_not_called()
        mock_supported.assert_called_with(stripes=[], timezone=None)
        mock_task_manager.return_value.create_scraper_phase_task.assert_not_called()
Exemplo n.º 15
0
    def test_stop_no_session(self, mock_sessions, mock_task_manager,
                             mock_region, mock_supported):
        mock_sessions.return_value = None
        mock_scraper = create_autospec(BaseScraper)
        mock_region.return_value = fake_region(ingestor=mock_scraper)
        mock_supported.return_value = ['us_ca', 'us_ut']

        request_args = {
            'region': 'all',
            'scrape_type': 'all',
            'respect_is_stoppable': 'false'
        }

        headers = {'X-Appengine-Cron': 'test-cron'}
        response = self.client.get('/stop',
                                   query_string=request_args,
                                   headers=headers)
        assert response.status_code == 200

        mock_sessions.assert_has_calls([
            call(ScrapeKey('us_ca', constants.ScrapeType.BACKGROUND)),
            call(ScrapeKey('us_ca', constants.ScrapeType.SNAPSHOT)),
            call(ScrapeKey('us_ut', constants.ScrapeType.BACKGROUND)),
            call(ScrapeKey('us_ut', constants.ScrapeType.SNAPSHOT))
        ])
        mock_scraper.stop_scrape.assert_not_called()
        mock_supported.assert_called_with(stripes=[], timezone=None)
        mock_task_manager.return_value.create_scraper_phase_task.\
            assert_not_called()
Exemplo n.º 16
0
    def test_update_raw_data_latest_views_for_state(self, mock_updater_fn,
                                                    mock_supported,
                                                    mock_region,
                                                    mock_environment):
        with local_project_id_override('recidiviz-staging'):
            mock_supported.return_value = ['us_xx']
            mock_updater = create_autospec(
                DirectIngestRawDataTableLatestViewUpdater)
            mock_updater_fn.return_value = mock_updater

            region_code = 'us_xx'

            mock_environment.return_value = 'staging'
            mock_region.return_value = fake_region(region_code=region_code,
                                                   environment='staging')

            request_args = {
                'region': region_code,
            }

            headers = {'X-Appengine-Cron': 'test-cron'}

            response = self.client.post(
                '/update_raw_data_latest_views_for_state',
                query_string=request_args,
                headers=headers)
            mock_updater.update_views_for_state.assert_called_once()
            self.assertEqual(200, response.status_code)
Exemplo n.º 17
0
    def test_start(self, mock_purge, mock_get_sessions, mock_docket,
                   mock_tracker, mock_create_session, mock_update_phase,
                   mock_region, mock_supported, mock_environment):
        """Tests that the start operation chains together the correct calls."""
        mock_purge.return_value = None
        mock_docket.return_value = None
        mock_tracker.return_value = None
        mock_get_sessions.return_value = iter([])
        mock_scraper = create_autospec(BaseScraper)
        mock_region.return_value = fake_region(environment='production',
                                               ingestor=mock_scraper)
        mock_environment.return_value = 'production'
        mock_supported.side_effect = _MockSupported

        region = 'us_ut'
        scrape_type = constants.ScrapeType.BACKGROUND
        scrape_key = ScrapeKey(region, scrape_type)
        request_args = {'region': region, 'scrape_type': scrape_type.value}
        headers = {'X-Appengine-Cron': 'test-cron'}
        response = self.client.get('/start',
                                   query_string=request_args,
                                   headers=headers)
        assert response.status_code == 200

        mock_purge.assert_called_with(scrape_key, 'scraper_batch')
        mock_docket.assert_called_with(scrape_key, '', '')
        mock_tracker.assert_called_with(scrape_key)
        mock_create_session.assert_called_with(scrape_key)
        mock_update_phase.assert_called_with(mock_create_session.return_value,
                                             scrape_phase.ScrapePhase.SCRAPE)
        mock_region.assert_called_with('us_ut')
        mock_scraper.start_scrape.assert_called()
        mock_supported.assert_called_with(stripes=[], timezone=None)
Exemplo n.º 18
0
    def test_ingest_view_export(self, mock_supported, mock_region,
                                mock_environment):
        mock_supported.return_value = ['us_xx']

        region_code = 'us_xx'

        mock_environment.return_value = 'staging'
        mock_controller = create_autospec(GcsfsDirectIngestController)
        mock_region.return_value = fake_region(region_code=region_code,
                                               environment='staging',
                                               ingestor=mock_controller)

        export_args = GcsfsIngestViewExportArgs(
            ingest_view_name='my_ingest_view',
            upper_bound_datetime_prev=datetime.datetime(2020, 4, 29),
            upper_bound_datetime_to_export=datetime.datetime(2020, 4, 30))

        request_args = {
            'region': region_code,
        }
        body = {
            'cloud_task_args': export_args.to_serializable(),
            'args_type': 'GcsfsIngestViewExportArgs',
        }
        body_encoded = json.dumps(body).encode()

        headers = {'X-Appengine-Cron': 'test-cron'}

        response = self.client.post('/ingest_view_export',
                                    query_string=request_args,
                                    headers=headers,
                                    data=body_encoded)
        self.assertEqual(200, response.status_code)
        mock_controller.do_ingest_view_export.assert_called_with(export_args)
Exemplo n.º 19
0
    def test_handle_new_files_no_start_ingest_in_production(
            self, mock_region, mock_environment):
        """Tests that handle_new_files will run and rename files in unlaunched locations, but will not schedule a job to
        process any files."""
        region_code = 'us_nd'

        mock_environment.return_value = 'production'
        mock_controller = create_autospec(GcsfsDirectIngestController)
        mock_region.return_value = fake_region(region_code=region_code,
                                               environment='staging',
                                               ingestor=mock_controller)
        request_args = {
            'region': region_code,
            'can_start_ingest': 'False',
        }
        headers = {'X-Appengine-Cron': 'test-cron'}
        response = self.client.get('/handle_new_files',
                                   query_string=request_args,
                                   headers=headers)

        mock_controller.schedule_next_ingest_job_or_wait_if_necessary.assert_not_called(
        )
        mock_controller.run_ingest_job_and_kick_scheduler_on_completion.assert_not_called(
        )
        mock_controller.handle_new_files.assert_called_with(False)

        self.assertEqual(200, response.status_code)
 def test_build_throws_in_prod_region_only_launched_in_staging(
     self, ) -> None:
     mock_region = fake_region(
         region_code="us_xx",
         environment="staging",
         is_direct_ingest=True,
         region_module=templates,
     )
     with patch(
             "recidiviz.utils.regions.get_region",
             Mock(return_value=mock_region),
     ):
         ingest_bucket_path = gcsfs_direct_ingest_bucket_for_region(
             region_code=mock_region.region_code,
             system_level=SystemLevel.for_region(mock_region),
             ingest_instance=DirectIngestInstance.PRIMARY,
         )
         with self.assertRaises(DirectIngestError) as e:
             _ = DirectIngestControllerFactory.build(
                 ingest_bucket_path=ingest_bucket_path,
                 allow_unlaunched=False)
         self.assertEqual(
             str(e.exception),
             "Bad environment [production] for region [us_xx].",
         )
    def test_handle_new_files_no_start_ingest_in_production(
            self, mock_region: mock.MagicMock,
            mock_environment: mock.MagicMock) -> None:
        """Tests that handle_new_files will run and rename files in unlaunched locations, but will not schedule a job to
        process any files."""
        region_code = "us_nd"

        mock_environment.return_value = "production"
        mock_controller = create_autospec(GcsfsDirectIngestController)
        mock_region.return_value = fake_region(region_code=region_code,
                                               environment="staging",
                                               ingestor=mock_controller)
        request_args = {
            "region": region_code,
            "can_start_ingest": "False",
        }
        headers = {"X-Appengine-Cron": "test-cron"}
        response = self.client.get("/handle_new_files",
                                   query_string=request_args,
                                   headers=headers)

        mock_controller.schedule_next_ingest_job_or_wait_if_necessary.assert_not_called(
        )
        mock_controller.run_ingest_job_and_kick_scheduler_on_completion.assert_not_called(
        )
        mock_controller.handle_new_files.assert_called_with(False)

        self.assertEqual(200, response.status_code)
    def test_update_raw_data_latest_views_for_state(
        self,
        mock_updater_fn: mock.MagicMock,
        mock_supported: mock.MagicMock,
        mock_region: mock.MagicMock,
        mock_environment: mock.MagicMock,
    ) -> None:
        with local_project_id_override("recidiviz-staging"):
            mock_supported.return_value = ["us_xx"]
            mock_updater = create_autospec(
                DirectIngestRawDataTableLatestViewUpdater)
            mock_updater_fn.return_value = mock_updater

            region_code = "us_xx"

            mock_environment.return_value = "staging"
            mock_region.return_value = fake_region(region_code=region_code,
                                                   environment="staging")

            request_args = {
                "region": region_code,
            }

            headers = {"X-Appengine-Cron": "test-cron"}

            response = self.client.post(
                "/update_raw_data_latest_views_for_state",
                query_string=request_args,
                headers=headers,
            )
            mock_updater.update_views_for_state.assert_called_once()
            self.assertEqual(200, response.status_code)
    def test_handle_file_start_ingest(
            self, mock_region: mock.MagicMock,
            mock_environment: mock.MagicMock) -> None:
        region_code = "us_nd"

        mock_environment.return_value = "production"
        mock_controller = create_autospec(GcsfsDirectIngestController)
        mock_region.return_value = fake_region(region_code=region_code,
                                               environment="production",
                                               ingestor=mock_controller)
        path = GcsfsFilePath.from_absolute_path(
            "bucket-us-nd/elite_offenders.csv")

        request_args = {
            "region": region_code,
            "bucket": path.bucket_name,
            "relative_file_path": path.blob_name,
            "start_ingest": "True",
        }
        headers = {"X-Appengine-Cron": "test-cron"}
        response = self.client.get("/handle_direct_ingest_file",
                                   query_string=request_args,
                                   headers=headers)

        mock_controller.handle_file.assert_called_with(path, True)

        # Even though the region isn't supported, we don't crash
        self.assertEqual(200, response.status_code)
    def test_handle_file_start_ingest_unsupported_region(
            self, mock_region: mock.MagicMock,
            mock_environment: mock.MagicMock) -> None:
        region_code = "us_nd"

        mock_environment.return_value = "production"
        mock_controller = create_autospec(GcsfsDirectIngestController)
        mock_region.return_value = fake_region(region_code=region_code,
                                               environment="staging",
                                               ingestor=mock_controller)

        path = GcsfsFilePath.from_absolute_path(
            "bucket-us-nd/elite_offenders.csv")

        request_args = {
            "region": region_code,
            "bucket": path.bucket_name,
            "relative_file_path": path.blob_name,
            "start_ingest": "False",
        }
        headers = {"X-Appengine-Cron": "test-cron"}

        response = self.client.get("/handle_direct_ingest_file",
                                   query_string=request_args,
                                   headers=headers)

        mock_region.assert_called_with("us_nd", is_direct_ingest=True)
        mock_controller.handle_file.assert_called_with(path, False)

        # Even though the region isn't supported, we don't crash - the
        # controller handles not starting ingest, and if it does by accident,
        # the actual schedule/process_job endpoints handle the unlaunched
        # region check.
        self.assertEqual(200, response.status_code)
    def test_schedule_unlaunched_region(
        self,
        mock_supported: mock.MagicMock,
        mock_region: mock.MagicMock,
        mock_environment: mock.MagicMock,
    ) -> None:
        mock_supported.return_value = ["us_nd", "us_pa"]

        region_code = "us_nd"

        mock_environment.return_value = "production"
        mock_controller = create_autospec(GcsfsDirectIngestController)
        mock_region.return_value = fake_region(region_code=region_code,
                                               environment="staging",
                                               ingestor=mock_controller)

        request_args = {"region": "us_nd", "just_finished_job": "False"}
        headers = {"X-Appengine-Cron": "test-cron"}

        response = self.client.get("/scheduler",
                                   query_string=request_args,
                                   headers=headers)
        self.assertEqual(400, response.status_code)
        self.assertEqual(
            response.get_data().decode(),
            "Bad environment [production] for region [us_nd].",
        )
    def test_schedule_diff_environment_in_production(
            self, mock_region: mock.MagicMock,
            mock_environment: mock.MagicMock) -> None:
        """Tests that the start operation chains together the correct calls."""
        mock_environment.return_value = "production"
        mock_controller = create_autospec(GcsfsDirectIngestController)

        region = "us_nd"

        mock_region.return_value = fake_region(environment="staging",
                                               region_code=region,
                                               ingestor=mock_controller)

        request_args = {"region": region}
        headers = {"X-Appengine-Cron": "test-cron"}
        response = self.client.get("/scheduler",
                                   query_string=request_args,
                                   headers=headers)

        mock_controller.schedule_next_ingest_job_or_wait_if_necessary.assert_not_called(
        )
        self.assertEqual(400, response.status_code)
        self.assertEqual(
            response.get_data().decode(),
            "Bad environment [production] for region [us_nd].",
        )

        mock_region.assert_called_with("us_nd", is_direct_ingest=True)
    def test_handle_sftp_files(
        self,
        mock_get_region: mock.MagicMock,
        mock_environment: mock.MagicMock,
        mock_cloud_task_manager: mock.MagicMock,
        _mock_cloud_tasks_client: mock.MagicMock,
    ) -> None:
        fake_regions = {
            "us_id":
            fake_region(region_code="us_id",
                        environment="staging",
                        ingestor=Mock())
        }

        mock_get_region.side_effect = (lambda region_code, is_direct_ingest:
                                       fake_regions.get(region_code))
        mock_environment.return_value = "staging"
        mock_cloud_task_manager.return_value = create_autospec(
            DirectIngestCloudTaskManagerImpl)

        headers = {"X-Appengine-Cron": "test-cron"}
        request_args = {"region": "us_id"}
        response = self.client.get("/handle_sftp_files",
                                   query_string=request_args,
                                   headers=headers)
        self.assertEqual(200, response.status_code)
 def test_get_referencing_views(
     self,
     mock_normalized_config_fn: MagicMock,
     mock_unnormalized_config_fn: MagicMock,
 ) -> None:
     mock_normalized_config_fn.return_value = FakeDirectIngestRegionRawFileConfig(
         "US_XX"
     )
     mock_unnormalized_config_fn.return_value = FakeDirectIngestRegionRawFileConfig(
         "US_XX"
     )
     documentation_generator = DirectIngestDocumentationGenerator()
     tags = ["tagA", "tagB", "tagC"]
     my_collector = FakeDirectIngestPreProcessedIngestViewCollector(
         region=fake_region(), controller_tag_rank_list=tags
     )
     expected_referencing_views = {
         "tagA": ["tagA", "gatedTagNotInTagsList"],
         "tagB": ["tagB", "gatedTagNotInTagsList"],
         "tagC": ["tagC"],
     }
     self.assertEqual(
         documentation_generator.get_referencing_views(
             my_collector
         ),  # pylint: disable=W0212
         expected_referencing_views,
     )
    def test_kick_all_schedulers_ignores_unlaunched_environments(
        self,
        mock_get_region: mock.MagicMock,
        mock_environment: mock.MagicMock,
        mock_supported_region_codes: mock.MagicMock,
    ) -> None:

        fake_supported_regions = {
            "us_mo":
            fake_region(region_code="us_mo",
                        environment="staging",
                        ingestor=Mock()),
            "us_nd":
            fake_region(region_code="us_nd",
                        environment="production",
                        ingestor=Mock()),
        }

        mock_cloud_task_manager = create_autospec(DirectIngestCloudTaskManager)
        for region in fake_supported_regions.values():
            region.get_ingestor().__class__ = GcsfsDirectIngestController
            region.get_ingestor().cloud_task_manager.return_value = (
                mock_cloud_task_manager)

        def fake_get_region(region_code: str,
                            is_direct_ingest: bool) -> Region:
            if not is_direct_ingest:
                self.fail("is_direct_ingest is False")

            return fake_supported_regions[region_code]

        mock_get_region.side_effect = fake_get_region

        mock_supported_region_codes.return_value = fake_supported_regions.keys(
        )

        mock_environment.return_value = "production"

        kick_all_schedulers()

        mock_supported_region_codes.assert_called()
        for region in fake_supported_regions.values():
            if region.environment == "staging":
                region.get_ingestor().kick_all_scheduler.assert_not_called()
            else:
                region.get_ingestor().kick_scheduler.assert_called_once()
Exemplo n.º 30
0
    def test_stop_respects_region_is_not_stoppable(
        self,
        mock_sessions,
        mock_close,
        mock_phase,
        mock_task_manager,
        mock_region,
        mock_supported,
    ):
        session = sessions.ScrapeSession.new(
            key=None,
            region="us_xx",
            scrape_type=constants.ScrapeType.BACKGROUND,
            phase=scrape_phase.ScrapePhase.SCRAPE,
        )
        mock_sessions.return_value = session
        mock_close.return_value = [session]
        mock_scraper = create_autospec(BaseScraper)
        mock_region.return_value = fake_region(ingestor=mock_scraper)
        mock_region.return_value.is_stoppable = False
        mock_supported.return_value = ["us_ca", "us_ut"]

        request_args = {"region": "all", "scrape_type": "all"}
        headers = {"X-Appengine-Cron": "test-cron"}
        response = self.client.get("/stop", query_string=request_args, headers=headers)
        assert response.status_code == 200

        mock_sessions.assert_has_calls(
            [
                call(ScrapeKey("us_ca", constants.ScrapeType.BACKGROUND)),
                call(ScrapeKey("us_ca", constants.ScrapeType.SNAPSHOT)),
                call(ScrapeKey("us_ut", constants.ScrapeType.BACKGROUND)),
                call(ScrapeKey("us_ut", constants.ScrapeType.SNAPSHOT)),
            ]
        )
        mock_phase.assert_has_calls(
            [call(session, scrape_phase.ScrapePhase.PERSIST)] * 4
        )
        assert mock_scraper.stop_scrape.mock_calls == [
            call(constants.ScrapeType.BACKGROUND, None),
            call().__bool__(),
            call(constants.ScrapeType.SNAPSHOT, None),
            call().__bool__(),
            call(constants.ScrapeType.BACKGROUND, None),
            call().__bool__(),
            call(constants.ScrapeType.SNAPSHOT, None),
            call().__bool__(),
        ]

        mock_supported.assert_called_with(stripes=[], timezone=None)
        mock_task_manager.return_value.create_scraper_phase_task.assert_has_calls(
            [
                call(region_code="us_ca", url="/read_and_persist"),
                call(region_code="us_ut", url="/read_and_persist"),
            ],
            any_order=True,
        )