示例#1
0
    def test_find_duplicates_jobs(self):
        """
        Test finding duplicates using correct job count from parameter.
        """
        max_cpu = 666
        patch_cpu = patch('bear.output.cpu_count', return_value=max_cpu)
        patch_pool = patch('bear.output.Pool')
        with patch_cpu, patch_pool as pool:
            self.assertEqual(find_duplicates([], processes=1), {})
            self.assertEqual(find_duplicates([], processes=0), {})
            self.assertEqual(find_duplicates([], processes=4), {})

            self.assertEqual(
                [
                    # remove __iter__() calls because those return a tuple
                    # iterator instead of call().__enter__().map().__iter__()
                    item
                    for item in pool.mock_calls if '__iter__' not in str(item)
                ],
                [
                    call(processes=1),
                    call().__enter__(),
                    call().__enter__().map(hash_files, []),
                    call().__exit__(None, None, None),
                    call(processes=666),
                    call().__enter__(),
                    call().__enter__().map(hash_files, []),
                    call().__exit__(None, None, None),
                    call(processes=4),
                    call().__enter__(),
                    call().__enter__().map(hash_files, []),
                    call().__exit__(None, None, None),
                ])
示例#2
0
    def test_find_duplicates_chunks(self):
        """
        Test chunking list of files for multiple processes.
        """
        patch_pool = patch('bear.output.Pool')

        files = [str(num) for num in range(15)]

        def side_effect(folder):
            return {
                'a': files[:5],
                'b': files[5:10],
                'c': files[10:]
            }[basename(folder)]

        patch_find_files = patch('bear.output.find_files',
                                 side_effect=side_effect)

        # pylint: disable=confusing-with-statement
        with patch_pool as pool, patch_find_files:
            self.assertEqual(find_duplicates(['a', 'b', 'c'], processes=1), {})
            self.assertEqual(find_duplicates(['a', 'b', 'c'], processes=3), {})
            self.assertEqual(find_duplicates(['a', 'b', 'c'], processes=4), {})

            self.assertEqual(
                [
                    # remove __iter__() calls because those return a tuple
                    # iterator instead of call().__enter__().map().__iter__()
                    item
                    for item in pool.mock_calls if '__iter__' not in str(item)
                ],
                [
                    call(processes=1),
                    call().__enter__(),
                    # all files to single process
                    call().__enter__().map(hash_files, [files]),
                    call().__exit__(None, None, None),
                    call(processes=3),
                    call().__enter__(),
                    # files chunked len / processors -> 15 // 3 chunks
                    call().__enter__().map(
                        hash_files, [files[0:5], files[5:10], files[10:15]]),
                    call().__exit__(None, None, None),
                    call(processes=4),
                    call().__enter__(),
                    # files chunked len / processors -> 15 // 4 chunks
                    call().__enter__().map(hash_files, [
                        files[0:3], files[3:6], files[6:9], files[9:12],
                        files[12:]
                    ]),
                    call().__exit__(None, None, None),
                ])
示例#3
0
 def test_find_duplicates_join(self):
     """
     Test joining duplicates from multiple jobs.
     """
     mock_pool = MagicMock(
         **{
             '__enter__.return_value.map.return_value': [{
                 '123': ['original'],
                 '456': ['ori', 'dupli']
             }, {
                 '123': ['duplicate']
             }, {
                 '789': ['original'],
                 '012': ['orig', 'dup']
             }]
         })
     patch_pool = patch('bear.output.Pool', return_value=mock_pool)
     with patch_pool:
         # 789 is single-file original, ignored in filter_files()
         self.assertEqual(
             find_duplicates([], processes=2), {
                 '012': ['orig', 'dup'],
                 '123': ['original', 'duplicate'],
                 '456': ['ori', 'dupli']
             })
示例#4
0
def handle_duplicates(ctx: Context, hasher: Hasher):
    """
    Handle --duplicate related behavior.
    """

    duplicates = find_duplicates(ctx=ctx, hasher=hasher)
    output_duplicates(hashes=duplicates, out=ctx.output)
    if ctx.keep_oldest:
        remove_except_oldest(files=duplicates.values())
    elif ctx.keep_newest:
        remove_except_newest(files=duplicates.values())
示例#5
0
 def test_find_duplicates_join(self):
     """
     Test joining duplicates from multiple jobs.
     """
     mock_pool = MagicMock(**{
         '__enter__.return_value.map.return_value': [
             {'123': ['original'], '456': ['ori', 'dupli']},
             {'123': ['duplicate']},
             {'789': ['original'], '012': ['orig', 'dup']}
         ]
     })
     patch_pool = patch('bear.output.Pool', return_value=mock_pool)
     ctx = Context(Namespace(
         duplicates=[], jobs=2, files=[], traverse=[], hash=[]
     ))
     with patch_pool:
         # 789 is single-file original, ignored in filter_files()
         self.assertEqual(find_duplicates(
             ctx=ctx, hasher=Hasher.MD5
         ), {
             '012': ['orig', 'dup'],
             '123': ['original', 'duplicate'],
             '456': ['ori', 'dupli']
         })
示例#6
0
    def test_find_duplicates_chunks(self):
        """
        Test chunking list of files for multiple processes.
        """
        patch_pool = patch('bear.output.Pool')

        files = [str(num) for num in range(15)]

        # pylint: disable=dangerous-default-value
        def side_effect(ctx, folder):
            assert isinstance(ctx, Context)
            return {
                'a': files[:5],
                'b': files[5:10],
                'c': files[10:]
            }[basename(folder)]

        patch_find_files = patch(
            'bear.output.find_files', side_effect=side_effect
        )

        # because partial() != partial() in mock calls!
        # partially fun, partially headache -_-'
        fun_part = patch('bear.output.partial')

        # pylint: disable=confusing-with-statement
        with patch_pool as pool, patch_find_files, fun_part as partial_fun:
            self.assertEqual(find_duplicates(
                ctx=Context(Namespace(
                    duplicates=['a', 'b', 'c'], jobs=1,
                    files=[], traverse=[], hash=[]
                )),
                hasher=Hasher.MD5
            ), {})
            self.assertEqual(find_duplicates(
                ctx=Context(Namespace(
                    duplicates=['a', 'b', 'c'], jobs=3,
                    files=[], traverse=[], hash=[]
                )),
                hasher=Hasher.MD5
            ), {})
            self.assertEqual(find_duplicates(
                ctx=Context(Namespace(
                    duplicates=['a', 'b', 'c'], jobs=4,
                    files=[], traverse=[], hash=[]
                )),
                hasher=Hasher.MD5
            ), {})

            self.assertEqual([
                # remove __iter__() calls because those return a tuple
                # iterator instead of call().__enter__().map().__iter__()
                item for item in pool.mock_calls if '__iter__' not in str(item)
            ], [
                call(processes=1),
                call().__enter__(),
                # all files to single process
                call().__enter__().map(partial_fun(
                    hash_files, hasher=Hasher.MD5
                ), [files]),
                call().__exit__(None, None, None),

                call(processes=3),
                call().__enter__(),
                # files chunked len / processors -> 15 // 3 chunks
                call().__enter__().map(partial_fun(
                    hash_files, hasher=Hasher.MD5
                ), [
                    files[0:5], files[5:10], files[10:15]
                ]),
                call().__exit__(None, None, None),

                call(processes=4),
                call().__enter__(),
                # files chunked len / processors -> 15 // 4 chunks
                call().__enter__().map(partial_fun(
                    hash_files, hasher=Hasher.MD5
                ), [
                    files[0:3], files[3:6], files[6:9], files[9:12], files[12:]
                ]),
                call().__exit__(None, None, None),
            ])
示例#7
0
    def test_find_duplicates_jobs(self):
        """
        Test finding duplicates using correct job count from parameter.
        """
        max_cpu = 666
        patch_cpu = patch('bear.output.cpu_count', return_value=max_cpu)
        patch_pool = patch('bear.output.Pool')

        # because partial() != partial() in mock calls!
        # partially fun, partially headache -_-'
        fun_part = patch('bear.output.partial')

        # pylint: disable=confusing-with-statement
        with patch_cpu, patch_pool as pool, fun_part as partial_fun:
            self.assertEqual(find_duplicates(
                ctx=Context(Namespace(
                    duplicates=[], jobs=1,
                    files=[], traverse=[], hash=[]
                )),
                hasher=Hasher.MD5
            ), {})
            self.assertEqual(find_duplicates(
                ctx=Context(Namespace(
                    duplicates=[], jobs=0,
                    files=[], traverse=[], hash=[]
                )),
                hasher=Hasher.MD5
            ), {})
            self.assertEqual(find_duplicates(
                ctx=Context(Namespace(
                    duplicates=[], jobs=4,
                    files=[], traverse=[], hash=[]
                )),
                hasher=Hasher.MD5
            ), {})

            self.assertEqual([
                # remove __iter__() calls because those return a tuple
                # iterator instead of call().__enter__().map().__iter__()
                item for item in pool.mock_calls if '__iter__' not in str(item)
            ], [
                call(processes=1),
                call().__enter__(),
                call().__enter__().map(partial_fun(
                    hash_files, hasher=Hasher.MD5
                ), []),
                call().__exit__(None, None, None),

                call(processes=666),
                call().__enter__(),
                call().__enter__().map(partial_fun(
                    hash_files, hasher=Hasher.MD5
                ), []),
                call().__exit__(None, None, None),

                call(processes=4),
                call().__enter__(),
                call().__enter__().map(partial_fun(
                    hash_files, hasher=Hasher.MD5
                ), []),
                call().__exit__(None, None, None),
            ])