Пример #1
0
    def test_create_file(self):
        mkr = mkdata.DatasetMaker(self.dsdir, {
            'totalfiles': 10,
            'totalsize': 0
        })
        self.assertTrue(not os.path.exists(self.dsdir))

        f = mkr._create_file(82)
        self.assertTrue(os.path.exists(self.dsdir))
        fp = os.path.join(self.dsdir, f)
        self.assertTrue(os.path.exists(fp))
        self.assertEqual(os.stat(fp).st_size, 82)

        f = mkr._create_file(8200841, under='goob')
        self.assertTrue(f.startswith("goob" + os.sep),
                        "file not created under goob/")
        fp = os.path.join(self.dsdir, f)
        self.assertTrue(os.path.exists(fp))
        self.assertEqual(os.stat(fp).st_size, 8200841)

        f = mkr._create_file(411, under='goob')
        self.assertTrue(f.startswith("goob" + os.sep),
                        "file not created under goob/")
        fp = os.path.join(self.dsdir, f)
        self.assertTrue(os.path.exists(fp))
        self.assertEqual(os.stat(fp).st_size, 411)
Пример #2
0
    def test_fill_dir_1(self):
        mkr = mkdata.DatasetMaker(self.dsdir, {
            'totalfiles': 10,
            'totalsize': 0
        })
        self.assertTrue(not os.path.exists(self.dsdir))

        files = [{
            'totalsize': 100,
            'totalfiles': 4
        }, {
            'totalsize': 50,
            'reps': 4,
            'totalfiles': 1
        }, {}]
        # dirs = [{'totalsize': 500}, {'totalfiles': 5}]

        mkr._fill_dir('', 1500, 14, files)
        self.assertTrue(os.path.exists(self.dsdir))

        fns = os.listdir(self.dsdir)
        self.assertEqual(len(fns), 14,
                         "Wrong number of files: expected 14; got " + str(fns))
        sizes = sorted(
            [os.stat(os.path.join(self.dsdir, f)).st_size for f in fns])
        self.assertEqual(
            sizes,
            [25, 25, 25, 25, 50, 50, 50, 50, 200, 200, 200, 200, 200, 200])
        self.assertEqual(sum(sizes), 1500)
Пример #3
0
    def test_convert_new(self):
        # create the data
        self.bagdir = os.path.join(self.tempdir, "sampledata")
        self.assertTrue(not os.path.isdir(self.bagdir))
        dm = mkdata.DatasetMaker(
            self.bagdir, {
                'totalsize': 15,
                'totalfiles': 3,
                'files': [{
                    'totalsize': 10,
                    'totalfiles': 2
                }],
                'dirs': [{
                    'totalsize': 5,
                    'totalfiles': 1
                }]
            })
        dm.fill()
        self.assertTrue(os.path.isdir(self.bagdir))

        # turn it into a bag
        bag = bagit.make_bag(self.bagdir)
        self.assertTrue(bag.validate())

        mbdir = os.path.join(self.bagdir, 'multibag')
        self.assertTrue(not os.path.exists(mbdir))

        # convert it to a multibag
        self.mkr = amend.SingleMultibagMaker(self.bagdir)
        self.mkr.convert("1.5", "doi:XXXX/11111")
        self.assertTrue(os.path.exists(mbdir))

        # validate it as a head bag
        valid8.validate_headbag(self.bagdir)
Пример #4
0
    def test_fill_dir_2(self):
        mkr = mkdata.DatasetMaker(self.dsdir, {
            'totalfiles': 10,
            'totalsize': 0
        })
        self.assertTrue(not os.path.exists(self.dsdir))

        files = [{
            'totalsize': 100,
            'totalfiles': 4
        }, {
            'totalsize': 50,
            'reps': 4,
            'totalfiles': 1
        }, {}]
        dirs = [{'totalsize': 500}, {'totalfiles': 5}]

        mkr._fill_dir('', 2000, 19, files, dirs)
        self.assertTrue(os.path.exists(self.dsdir))

        fns = os.listdir(self.dsdir)
        self.assertEqual(
            len(fns), 13,
            "Wrong number of file/dirs: expected 13; got " + str(fns))
        dirs = [f for f in fns if f.endswith('_d')]
        self.assertEqual(len(dirs), 2)
        self.assertTrue(
            all([os.path.isdir(os.path.join(self.dsdir, d)) for d in dirs]),
            "Not all directories are really directories: " + str(dirs))

        sizes = sorted([
            os.stat(os.path.join(self.dsdir, f)).st_size for f in fns
            if not f.endswith('_d')
        ])
        self.assertEqual(sizes,
                         [25, 25, 25, 25, 50, 50, 50, 50, 200, 200, 200])
        self.assertEqual(sum(sizes), 900)

        fns = [[
            os.path.join(dirs[0], f)
            for f in os.listdir(os.path.join(self.dsdir, dirs[0]))
        ],
               [
                   os.path.join(dirs[1], f)
                   for f in os.listdir(os.path.join(self.dsdir, dirs[1]))
               ]]
        if fns[0][0].endswith('_120'):
            fns = [fns[1], fns[0]]

        self.assertEqual(len(fns[0]), 3)
        sizes = sorted(
            [os.stat(os.path.join(self.dsdir, f)).st_size for f in fns[0]])
        self.assertEqual(sizes, [166, 167, 167])

        self.assertEqual(len(fns[1]), 5)
        sizes = sorted(
            [os.stat(os.path.join(self.dsdir, f)).st_size for f in fns[1]])
        self.assertEqual(sizes, [120, 120, 120, 120, 120])
Пример #5
0
 def test_ensure_root(self):
     mkr = mkdata.DatasetMaker(self.dsdir, {
         'totalfiles': 10,
         'totalsize': 0
     })
     self.assertTrue(not os.path.exists(self.dsdir))
     mkr.ensure_root()
     self.assertTrue(os.path.exists(self.dsdir))
     mkr.ensure_root()
     self.assertTrue(os.path.exists(self.dsdir))
Пример #6
0
 def test_mkfid(self):
     mkr = mkdata.DatasetMaker(self.dsdir, {
         'totalfiles': 10,
         'totalsize': 0
     })
     nms = [mkr._mkfid(100) for i in range(10)]
     for i in range(len(nms)):
         self.assertNotEqual(nms[0], nms[1])
         n = nms.pop(0)
         nms.append(n)
Пример #7
0
    def test_mkfilename(self):
        mkr = mkdata.DatasetMaker(self.dsdir, {
            'totalfiles': 10,
            'totalsize': 0
        })
        mkr.ensure_root()
        files = []
        for i in range(100):
            p = mkr._mkfilename(lambda fn: "_{0}_".format(fn), '')
            with open(os.path.join(mkr.root, p), 'w') as fd:
                fd.write('\n')
                files.append(p)

        for f in files:
            self.assertTrue(os.path.exists(os.path.join(self.dsdir, f)),
                            "failed to create " + f)
Пример #8
0
def mkbag(dsdir):
    spec = {
        "totalfiles": 22,
        "totalsize": 1014600,
        "files": [{ "type": "inventory",
                    "sizes": { 1400: 1, 159800: 1, 35000: 2, 0: 1, 1000: 1 }}],
        "dirs": [{
            "files": [{ "type": "inventory", "sizes": { 86000: 3 }}],
            "dirs": [{
                "files": [{ "type": "inventory", "sizes": {110000: 1, 5800: 4}}]
            }]
        }, {
            "files": [{ "type": "inventory", "sizes": { 86000: 3 }}],
            "dirs": [{
                "files": [{ "type": "inventory", "sizes": {110000: 1, 5800: 4}}]
            }]
        }]
    }

    mkr = mkdata.DatasetMaker(dsdir, spec)
    mkr.fill()

    bag = bagit.make_bag(dsdir)
    # self.assertTrue(bag.validate())

    # create metadata tree
    datadir = os.path.join(dsdir,"data")
    mdatadir = os.path.join(dsdir,"metadata")
    os.mkdir(mdatadir)
    with open(os.path.join(dsdir,"tagmanifest-sha256.txt"), "a") as fd:
        for dir, subdirs, files in os.walk(datadir):
            for f in files:
                f = os.path.join(mdatadir, os.path.join(dir, f)[len(datadir)+1:])
                if not os.path.exists(os.path.dirname(f)):
                    os.makedirs(os.path.dirname(f))
                mkdata.create_file(f, 16)
                fd.write("{0} {1}\n".format(checksum_of(f), f[len(dsdir)+1:]))

    # set the Bag-Size
    sz = du(dsdir)
    line = "Bag-Size: {0} B\n".format(sz)
    sz += len(line)
    line = "Bag-Size: {0} B\n".format(sz)
    with open(os.path.join(dsdir, "bag-info.txt"), "a") as fd:
        fd.write(line)
Пример #9
0
    def test_fill_with_files_w_iter(self):
        mkr = mkdata.DatasetMaker(self.dsdir, {
            'totalfiles': 10,
            'totalsize': 0
        })
        self.assertTrue(not os.path.exists(self.dsdir))

        iter = mkdata.UniformSizeIterator(totalsize=500,
                                          totalfiles=5).iterate()
        n = mkr._fill_with_files("goob", 3000, 4, iter)
        self.assertEqual(n, (400, 4))
        parent = os.path.join(self.dsdir, 'goob')
        files = os.listdir(os.path.join(self.dsdir, 'goob'))
        sizes = [
            os.stat(os.path.join(self.dsdir, 'goob', f)).st_size for f in files
        ]
        self.assertTrue(all([s == 100 for s in sizes]),
                        "Wrong file sizes: " + str(sizes))
        self.assertEqual(len(files), 4)
Пример #10
0
    def test_create_dir(self):
        mkr = mkdata.DatasetMaker(self.dsdir, {
            'totalfiles': 10,
            'totalsize': 0
        })
        self.assertTrue(not os.path.exists(self.dsdir))

        f = mkr._create_dir()
        self.assertTrue(os.path.exists(self.dsdir))
        fp = os.path.join(self.dsdir, f)
        self.assertTrue(os.path.exists(fp))
        self.assertTrue(os.path.isdir(fp))

        f = mkr._create_dir("furry/goob")
        self.assertTrue(os.path.exists(self.dsdir))
        self.assertTrue(f.startswith("furry" + os.sep + "goob" + os.sep),
                        "directory not created under furry/goob/")
        fp = os.path.join(self.dsdir, f)
        self.assertTrue(os.path.exists(fp))
        self.assertTrue(os.path.isdir(fp))
Пример #11
0
    def test_fill(self):
        mkr = mkdata.DatasetMaker(
            self.dsdir, {
                'totalsize': 15,
                'totalfiles': 3,
                'files': [{
                    'totalsize': 10,
                    'totalfiles': 2
                }],
                'dirs': [{
                    'totalsize': 5,
                    'totalfiles': 1
                }]
            })
        self.assertTrue(not os.path.exists(self.dsdir))

        mkr.fill()
        self.assertTrue(os.path.exists(self.dsdir))

        fns = os.listdir(self.dsdir)
        self.assertEqual(
            len(fns), 3,
            "Wrong number of file/dirs: expected 3; got " + str(fns))
        dirs = [f for f in fns if f.endswith('_d')]
        self.assertEqual(len(dirs), 1)
        self.assertTrue(os.path.isdir(os.path.join(self.dsdir, dirs[0])),
                        "Not a directoru: " + str(dirs))

        sizes = sorted([
            os.stat(os.path.join(self.dsdir, f)).st_size for f in fns
            if not f.endswith('_d')
        ])
        self.assertEqual(sizes, [5, 5])

        fns = os.listdir(os.path.join(self.dsdir, dirs[0]))
        self.assertEqual(len(fns), 1)
        self.assertEqual(
            os.stat(os.path.join(self.dsdir, dirs[0], fns[0])).st_size, 5)
Пример #12
0
    def test_fill_with_files(self):
        mkr = mkdata.DatasetMaker(self.dsdir, {
            'totalfiles': 10,
            'totalsize': 0
        })
        self.assertTrue(not os.path.exists(self.dsdir))

        n = mkr._fill_with_files("goob")
        self.assertEqual(n, (0, 0))
        self.assertTrue(not os.path.exists(self.dsdir))

        n = mkr._fill_with_files("goob", 103, 2)
        self.assertEqual(n, (0, 0))
        self.assertTrue(not os.path.exists(self.dsdir))

        n = mkr._fill_with_files("gurn", 103, 4, [75, 80])
        self.assertEqual(n, (75, 1))
        self.assertTrue(os.path.exists(self.dsdir))
        self.assertTrue(os.path.isdir(os.path.join(self.dsdir, 'gurn')))
        files = os.listdir(os.path.join(self.dsdir, 'gurn'))
        self.assertEqual(len(files), 1)
        self.assertEqual(
            os.stat(os.path.join(self.dsdir, 'gurn', files[0])).st_size, 75)

        n = mkr._fill_with_files("furry" + os.sep + "goob", 1000, 3,
                                 [75, 80, 200])
        self.assertEqual(n, (355, 3))
        self.assertTrue(os.path.exists(self.dsdir))
        self.assertTrue(
            os.path.isdir(os.path.join(self.dsdir, 'furry', 'goob')))
        files = os.listdir(os.path.join(self.dsdir, 'furry', 'goob'))
        sizes = sorted([
            os.stat(os.path.join(self.dsdir, 'furry', 'goob', f)).st_size
            for f in files
        ])
        self.assertEqual(sizes, [75, 80, 200])
Пример #13
0
    def test_fill_dir_3(self):
        mkr = mkdata.DatasetMaker(self.dsdir, {
            'totalfiles': 10,
            'totalsize': 0
        })
        self.assertTrue(not os.path.exists(self.dsdir))

        files = [{
            'totalsize': 100,
            'totalfiles': 4
        }, {
            'type': 'inventory',
            'sizes': {
                15: 2,
                10: 3
            }
        }, {
            'totalsize': 50,
            'reps': 4,
            'totalfiles': 1
        }, {
            'type': 'inventory',
            'sizes': {
                22: 1,
                18: 1
            }
        }]
        dirs = [{'totalsize': 500}, {'totalfiles': 5}]

        # pdb.set_trace()
        mkr._fill_dir('', 2000, 22, files, dirs)
        self.assertTrue(os.path.exists(self.dsdir))

        fns = os.listdir(self.dsdir)
        self.assertEqual(
            len(fns), 17,
            "Wrong number of file/dirs: expected 17; got " + str(fns))
        dirs = [f for f in fns if f.endswith('_d')]
        self.assertEqual(len(dirs), 2)
        self.assertTrue(
            all([os.path.isdir(os.path.join(self.dsdir, d)) for d in dirs]),
            "Not all directories are really directories: " + str(dirs))

        sizes = sorted([
            os.stat(os.path.join(self.dsdir, f)).st_size for f in fns
            if not f.endswith('_d')
        ])
        self.assertEqual(
            sizes,
            [10, 10, 10, 15, 15, 18, 22, 25, 25, 25, 25, 50, 50, 50, 50])
        self.assertEqual(sum(sizes), 400)

        fns = [[
            os.path.join(dirs[0], f)
            for f in os.listdir(os.path.join(self.dsdir, dirs[0]))
        ],
               [
                   os.path.join(dirs[1], f)
                   for f in os.listdir(os.path.join(self.dsdir, dirs[1]))
               ]]
        if fns[0][0].endswith('_220'):
            fns = [fns[1], fns[0]]

        self.assertEqual(len(fns[0]), 2)
        sizes = sorted(
            [os.stat(os.path.join(self.dsdir, f)).st_size for f in fns[0]])
        self.assertEqual(sizes, [250, 250])

        self.assertEqual(len(fns[1]), 5)
        sizes = sorted(
            [os.stat(os.path.join(self.dsdir, f)).st_size for f in fns[1]])
        self.assertEqual(sizes, [220, 220, 220, 220, 220])
Пример #14
0
    def test_distribute(self):
        mkr = mkdata.DatasetMaker(self.dsdir, {
            'totalfiles': 10,
            'totalsize': 0
        })

        files = [{
            'totalsize': 100,
            'totalfiles': 4
        }, {
            'totalsize': 50,
            'reps': 4,
            'totalfiles': 1
        }, {}]
        dirs = [{'totalsize': 500}, {'totalfiles': 5}]

        mkr._distribute(2000, 19, files, dirs)
        for fs in files:
            self.assertIn('iter', fs)
            del fs['iter']
            del fs['type']
        self.assertEquals(files, [{
            'totalsize': 100,
            'totalfiles': 4
        }, {
            'totalsize': 50,
            'reps': 4,
            'totalfiles': 1
        }, {
            'totalsize': 600,
            'totalfiles': 3
        }])
        self.assertEquals(dirs, [{
            'totalsize': 500,
            'totalfiles': 3
        }, {
            'totalsize': 600,
            'totalfiles': 5
        }])

        # test for adding extra
        mkr._distribute(2200, 22, files, dirs)
        for fs in files:
            self.assertIn('iter', fs)
            del fs['iter']
            del fs['type']
        self.assertEquals(files, [{
            'totalsize': 100,
            'totalfiles': 4
        }, {
            'totalsize': 50,
            'reps': 4,
            'totalfiles': 1
        }, {
            'totalsize': 600,
            'totalfiles': 3
        }, {
            'totalsize': 200,
            'totalfiles': 3
        }])
        self.assertEquals(dirs, [{
            'totalsize': 500,
            'totalfiles': 3
        }, {
            'totalsize': 600,
            'totalfiles': 5
        }])