Exemplo n.º 1
0
    def testWalk(self):
        """Test that we can list an S3 bucket"""
        # Files we want to create
        expected = ("a/x.txt", "a/y.txt", "a/z.json", "a/b/w.txt",
                    "a/b/c/d/v.json")
        expected_uris = [ButlerURI(self.makeS3Uri(path)) for path in expected]
        for uri in expected_uris:
            # Doesn't matter what we write
            uri.write("123".encode())

        # Find all the files in the a/ tree
        found = set(uri.path for uri in ButlerURI.findFileResources(
            [ButlerURI(self.makeS3Uri("a/"))]))
        self.assertEqual(found, {uri.path for uri in expected_uris})

        # Find all the files in the a/ tree but group by folder
        found = ButlerURI.findFileResources([ButlerURI(self.makeS3Uri("a/"))],
                                            grouped=True)
        expected = (("/a/x.txt", "/a/y.txt", "/a/z.json"), ("/a/b/w.txt", ),
                    ("/a/b/c/d/v.json", ))

        for got, expect in zip(found, expected):
            self.assertEqual(tuple(u.path for u in got), expect)

        # Find only JSON files
        found = set(uri.path for uri in ButlerURI.findFileResources(
            [ButlerURI(self.makeS3Uri("a/"))], file_filter=r"\.json$"))
        self.assertEqual(
            found,
            {uri.path
             for uri in expected_uris if uri.path.endswith(".json")})

        # JSON files grouped by directory
        found = ButlerURI.findFileResources([ButlerURI(self.makeS3Uri("a/"))],
                                            file_filter=r"\.json$",
                                            grouped=True)
        expected = (("/a/z.json", ), ("/a/b/c/d/v.json", ))

        for got, expect in zip(found, expected):
            self.assertEqual(tuple(u.path for u in got), expect)

        # Check pagination works with large numbers of files. S3 API limits
        # us to 1000 response per list_objects call so create lots of files
        created = set()
        counter = 1
        n_dir1 = 1100
        while counter <= n_dir1:
            new = ButlerURI(self.makeS3Uri(f"test/file{counter:04d}.txt"))
            new.write(f"{counter}".encode())
            created.add(str(new))
            counter += 1
        counter = 1
        # Put some in a subdirectory to make sure we are looking in a
        # hierarchy.
        n_dir2 = 100
        while counter <= n_dir2:
            new = ButlerURI(
                self.makeS3Uri(f"test/subdir/file{counter:04d}.txt"))
            new.write(f"{counter}".encode())
            created.add(str(new))
            counter += 1

        found = ButlerURI.findFileResources(
            [ButlerURI(self.makeS3Uri("test/"))])
        self.assertEqual({str(u) for u in found}, created)

        # Again with grouping.
        found = list(
            ButlerURI.findFileResources([ButlerURI(self.makeS3Uri("test/"))],
                                        grouped=True))
        self.assertEqual(len(found), 2)
        dir_1 = list(found[0])
        dir_2 = list(found[1])
        self.assertEqual(len(dir_1), n_dir1)
        self.assertEqual(len(dir_2), n_dir2)
Exemplo n.º 2
0
 def find_files(root: str) -> List[ButlerURI]:
     return list(ButlerURI.findFileResources([root]))
Exemplo n.º 3
0
    def testWalk(self):
        """Test ButlerURI.walk()."""
        test_dir_uri = ButlerURI(TESTDIR)

        file = test_dir_uri.join("config/basic/butler.yaml")
        found = list(ButlerURI.findFileResources([file]))
        self.assertEqual(found[0], file)

        # Compare against the full local paths
        expected = set(p for p in glob.glob(
            os.path.join(TESTDIR, "config", "**"), recursive=True)
                       if os.path.isfile(p))
        found = set(u.ospath for u in ButlerURI.findFileResources(
            [test_dir_uri.join("config")]))
        self.assertEqual(found, expected)

        # Now solely the YAML files
        expected_yaml = set(
            glob.glob(os.path.join(TESTDIR, "config", "**", "*.yaml"),
                      recursive=True))
        found = set(u.ospath for u in ButlerURI.findFileResources(
            [test_dir_uri.join("config")], file_filter=r".*\.yaml$"))
        self.assertEqual(found, expected_yaml)

        # Now two explicit directories and a file
        expected = set(
            glob.glob(os.path.join(TESTDIR, "config", "**", "basic", "*.yaml"),
                      recursive=True))
        expected.update(
            set(
                glob.glob(os.path.join(TESTDIR, "config", "**", "templates",
                                       "*.yaml"),
                          recursive=True)))
        expected.add(file.ospath)

        found = set(
            u.ospath
            for u in ButlerURI.findFileResources([
                file,
                test_dir_uri.join("config/basic"),
                test_dir_uri.join("config/templates")
            ],
                                                 file_filter=r".*\.yaml$"))
        self.assertEqual(found, expected)

        # Group by directory -- find everything and compare it with what
        # we expected to be there in total. We expect to find 9 directories
        # containing yaml files so make sure we only iterate 9 times.
        found_yaml = set()
        counter = 0
        for uris in ButlerURI.findFileResources(
            [file, test_dir_uri.join("config/")],
                file_filter=r".*\.yaml$",
                grouped=True):
            found = set(u.ospath for u in uris)
            if found:
                counter += 1

            found_yaml.update(found)

        self.assertEqual(found_yaml, expected_yaml)
        self.assertEqual(counter, 9)

        # Grouping but check that single files are returned in a single group
        # at the end
        file2 = test_dir_uri.join("config/templates/templates-bad.yaml")
        found = list(
            ButlerURI.findFileResources(
                [file, file2, test_dir_uri.join("config/dbAuth")],
                grouped=True))
        self.assertEqual(len(found), 2)
        self.assertEqual(list(found[1]), [file, file2])

        with self.assertRaises(ValueError):
            list(file.walk())