def test_do_upload_graceful_failures(self, mock_fs_factory: Mock) -> None:
     mock_fs = FakeGCSFileSystem()
     mock_fs.test_add_path(
         path=GcsfsFilePath.from_bucket_and_blob_name(
             "test-project-direct-ingest-state-us-xx", "raw_data/test_file.txt"
         ),
         local_path=None,
     )
     mock_fs_factory.return_value = mock_fs
     controller = UploadStateFilesToIngestBucketController(
         paths_with_timestamps=[
             (
                 "test-project-direct-ingest-state-us-xx/raw_data/test_file.txt",
                 TODAY,
             ),
             (
                 "test-project-direct-ingest-state-us-xx/raw_data/non_existent_file.txt",
                 TODAY,
             ),
         ],
         project_id="test-project",
         region="us_xx",
     )
     uploaded_files, unable_to_upload_files = controller.do_upload()
     self.assertEqual(
         uploaded_files,
         ["test-project-direct-ingest-state-us-xx/raw_data/test_file.txt"],
     )
     self.assertEqual(
         unable_to_upload_files,
         ["test-project-direct-ingest-state-us-xx/raw_data/non_existent_file.txt"],
     )
Exemplo n.º 2
0
 def test_do_upload_succeeds(self, mock_fs_factory: Mock) -> None:
     mock_fs = FakeGCSFileSystem()
     mock_fs.test_add_path(
         path=GcsfsFilePath.from_bucket_and_blob_name(
             "recidiviz-456-direct-ingest-state-us-xx",
             "raw_data/test_file.txt"),
         local_path=None,
     )
     mock_fs_factory.return_value = mock_fs
     controller = UploadStateFilesToIngestBucketController(
         paths_with_timestamps=[(
             "recidiviz-456-direct-ingest-state-us-xx/raw_data/test_file.txt",
             TODAY,
         )],
         project_id="recidiviz-456",
         region="us_xx",
     )
     expected_result = [
         "recidiviz-456-direct-ingest-state-us-xx/raw_data/test_file.txt"
     ]
     result: MultiRequestResultWithSkipped[str, str,
                                           str] = controller.do_upload()
     self.assertEqual(result.successes, expected_result)
     self.assertEqual(len(result.failures), 0)
     self.assertEqual(len(controller.skipped_files), 0)
     self.assertFalse(self.us_xx_manager.is_instance_paused())
Exemplo n.º 3
0
 def test_do_upload_sets_correct_content_type(
     self,
     mock_fs_factory: Mock,
 ) -> None:
     mock_fs = FakeGCSFileSystem()
     mock_fs.test_add_path(
         path=GcsfsFilePath.from_bucket_and_blob_name(
             "recidiviz-456-direct-ingest-state-us-xx",
             "raw_data/test_file.txt"),
         local_path=None,
     )
     mock_fs.test_add_path(
         path=GcsfsFilePath.from_bucket_and_blob_name(
             "recidiviz-456-direct-ingest-state-us-xx",
             "raw_data/test_file.csv"),
         local_path=None,
     )
     mock_fs_factory.return_value = mock_fs
     controller = UploadStateFilesToIngestBucketController(
         paths_with_timestamps=[
             (
                 "recidiviz-456-direct-ingest-state-us-xx/raw_data/test_file.txt",
                 TODAY,
             ),
             (
                 "recidiviz-456-direct-ingest-state-us-xx/raw_data/test_file.csv",
                 TODAY,
             ),
         ],
         project_id="recidiviz-456",
         region="us_xx",
     )
     result: MultiRequestResultWithSkipped[str, str,
                                           str] = controller.do_upload()
     self.assertListEqual(
         result.successes,
         [
             "recidiviz-456-direct-ingest-state-us-xx/raw_data/test_file.txt",
             "recidiviz-456-direct-ingest-state-us-xx/raw_data/test_file.csv",
         ],
     )
     resulting_content_types = [
         file.content_type for file in mock_fs.files.values()
     ]
     self.assertListEqual(resulting_content_types,
                          ["text/plain", "text/csv"])
     self.assertFalse(self.us_xx_manager.is_instance_paused())
Exemplo n.º 4
0
 def test_get_paths_to_upload_is_correct(
     self,
     mock_fs_factory: Mock,
 ) -> None:
     mock_fs = FakeGCSFileSystem()
     mock_fs.test_add_path(
         path=GcsfsFilePath.from_bucket_and_blob_name(
             "recidiviz-456-direct-ingest-state-us-xx",
             "raw_data/test_file.txt"),
         local_path=None,
     )
     mock_fs.test_add_path(
         path=GcsfsFilePath.from_bucket_and_blob_name(
             "recidiviz-456-direct-ingest-state-us-xx",
             "raw_data/subdir1/test_file.txt",
         ),
         local_path=None,
     )
     mock_fs.test_add_path(
         path=GcsfsDirectoryPath.from_bucket_and_blob_name(
             "recidiviz-456-direct-ingest-state-us-xx",
             "raw_data/subdir2/"),
         local_path=None,
     )
     mock_fs_factory.return_value = mock_fs
     controller = UploadStateFilesToIngestBucketController(
         paths_with_timestamps=[
             ("recidiviz-456-direct-ingest-state-us-xx/raw_data/", TODAY),
         ],
         project_id="recidiviz-456",
         region="us_xx",
     )
     result = [
         ("recidiviz-456-direct-ingest-state-us-xx/raw_data/test_file.txt",
          TODAY),
         (
             "recidiviz-456-direct-ingest-state-us-xx/raw_data/subdir1/test_file.txt",
             TODAY,
         ),
     ]
     self.assertListEqual(result, controller.get_paths_to_upload())
     self.assertFalse(self.us_xx_manager.is_instance_paused())
Exemplo n.º 5
0
    def ls_with_blob_prefix(self,
                            bucket_name: str,
                            blob_prefix: str) -> List[Union[GcsfsDirectoryPath, GcsfsFilePath]]:
        prefix = GcsfsFilePath.from_bucket_and_blob_name(bucket_name, blob_prefix)
        with self.mutex:
            results: List[Union[GcsfsDirectoryPath, GcsfsFilePath]] = []
            for abs_path, entry in self.files.items():
                if abs_path.startswith(prefix.abs_path()):
                    results.append(entry.gcs_path)

            return results
 def test_do_upload_sets_correct_content_type(self, mock_fs_factory: Mock) -> None:
     mock_fs = FakeGCSFileSystem()
     mock_fs.test_add_path(
         path=GcsfsFilePath.from_bucket_and_blob_name(
             "test-project-direct-ingest-state-us-xx", "raw_data/test_file.txt"
         ),
         local_path=None,
     )
     mock_fs.test_add_path(
         path=GcsfsFilePath.from_bucket_and_blob_name(
             "test-project-direct-ingest-state-us-xx", "raw_data/test_file.csv"
         ),
         local_path=None,
     )
     mock_fs_factory.return_value = mock_fs
     controller = UploadStateFilesToIngestBucketController(
         paths_with_timestamps=[
             (
                 "test-project-direct-ingest-state-us-xx/raw_data/test_file.txt",
                 TODAY,
             ),
             (
                 "test-project-direct-ingest-state-us-xx/raw_data/test_file.csv",
                 TODAY,
             ),
         ],
         project_id="test-project",
         region="us_xx",
     )
     uploaded_files, _ = controller.do_upload()
     self.assertListEqual(
         uploaded_files,
         [
             "test-project-direct-ingest-state-us-xx/raw_data/test_file.txt",
             "test-project-direct-ingest-state-us-xx/raw_data/test_file.csv",
         ],
     )
     resulting_content_types = [file.content_type for file in mock_fs.files.values()]
     self.assertListEqual(resulting_content_types, ["text/plain", "text/csv"])
Exemplo n.º 7
0
    def test_skip_already_processed_or_discovered_files(
        self,
        mock_fs_factory: Mock,
    ) -> None:
        mock_fs = FakeGCSFileSystem()
        mock_fs.test_add_path(
            path=GcsfsFilePath.from_bucket_and_blob_name(
                "recidiviz-456-direct-ingest-state-us-xx",
                "raw_data/test_file.txt"),
            local_path=None,
        )
        mock_fs.test_add_path(
            path=GcsfsFilePath.from_bucket_and_blob_name(
                "recidiviz-456-direct-ingest-state-us-xx",
                "raw_data/test_file.csv"),
            local_path=None,
        )

        mock_fs.test_add_path(
            path=GcsfsFilePath.from_bucket_and_blob_name(
                "recidiviz-456-direct-ingest-state-us-xx",
                "raw_data/skipped.csv",
            ),
            local_path=None,
        )
        mock_fs.test_add_path(
            path=GcsfsFilePath.from_bucket_and_blob_name(
                "recidiviz-456-direct-ingest-state-us-xx",
                "raw_data/discovered.csv",
            ),
            local_path=None,
        )
        mock_fs_factory.return_value = mock_fs
        controller = UploadStateFilesToIngestBucketController(
            paths_with_timestamps=[
                (
                    "recidiviz-456-direct-ingest-state-us-xx/raw_data/test_file.txt",
                    TODAY,
                ),
                (
                    "recidiviz-456-direct-ingest-state-us-xx/raw_data/test_file.csv",
                    TODAY,
                ),
                (
                    "recidiviz-456-direct-ingest-state-us-xx/raw_data/skipped.csv",
                    TODAY,
                ),
                (
                    "recidiviz-456-direct-ingest-state-us-xx/raw_data/discovered.csv",
                    TODAY,
                ),
            ],
            project_id="recidiviz-456",
            region="us_xx",
        )
        result: MultiRequestResultWithSkipped[str, str,
                                              str] = controller.do_upload()
        self.assertListEqual(
            result.successes,
            [
                "recidiviz-456-direct-ingest-state-us-xx/raw_data/test_file.txt",
                "recidiviz-456-direct-ingest-state-us-xx/raw_data/test_file.csv",
            ],
        )
        self.assertListEqual(
            result.skipped,
            [
                "recidiviz-456-direct-ingest-state-us-xx/raw_data/skipped.csv",
                "recidiviz-456-direct-ingest-state-us-xx/raw_data/discovered.csv",
            ],
        )
        self.assertFalse(self.us_xx_manager.is_instance_paused())