Пример #1
0
    def _write_copy(
        self,
        abs_file_name: str,
        new_sha: str,
        file_obj: IOIter,
        force_copy: bool,
        dry_run: bool,
    ) -> ManifestEntry:
        logger.info(f'Saving a new copy of {abs_file_name}')

        entry_data = None
        if not force_copy:
            entry_data = self._find_existing_entry_data(
                new_sha)  # test_f3_file_changed_while_saving
        key_pair, base_sha, base_key_pair = entry_data or (
            generate_key_pair(self.options),
            None,
            None,
        )
        new_entry = ManifestEntry(  # test_m2_crash_before_file_save
            abs_file_name,
            new_sha,
            base_sha,
            file_obj.uid,
            file_obj.gid,
            file_obj.mode,
            key_pair,
            base_key_pair,
        )
        if not dry_run and not entry_data:
            signature = self.save(file_obj, new_entry.sha, key_pair)
            new_entry.key_pair = key_pair + signature  # append the HMAC before writing to db
        return new_entry
Пример #2
0
def mock_search_results():
    return [
        ('/path/1/file1', [
            ManifestEntry('/path/1/file1', 'ab1dedef', None, 1000, 1000, 12345,
                          b'1111', None, 100),
            ManifestEntry('/path/1/file1', 'ab2dede1', None, 1000, 1000, 12345,
                          b'2222', None, 75),
            ManifestEntry('/path/1/file1', 'ab3dede2', None, 1000, 1000, 12345,
                          b'3333', None, 20),
        ]),
        ('/path/2/file2', [
            ManifestEntry('/path/2/file2', '1b1dedef', None, 1000, 1000, 12345,
                          b'4444', None, 105),
            ManifestEntry('/path/2/file2', '1b2dede1', None, 1000, 1000, 12345,
                          b'5555', None, 65),
        ]),
        ('/path/2/file3', [
            ManifestEntry('/path/2/file2', None, None, None, None, None, None,
                          None, 176),
            ManifestEntry('/path/2/file2', 'ffffeeee', None, 1000, 1000, 12345,
                          b'6666', None, 105),
        ]),
        ('/path/2/file4', [
            ManifestEntry('/path/2/file2', '12834567', None, 1000, 1000, 12345,
                          b'7777', None, 105),
        ]),
    ]
Пример #3
0
def test_delete_entry(mock_manifest):
    mock_manifest.delete_entry(
        ManifestEntry('/foo', '12345678', None, 1000, 2000, 34622, b'1234',
                      None, 50), )
    mock_manifest._cursor.execute('''
        select * from manifest where abs_file_name = '/foo'
        ''')
    rows = mock_manifest._cursor.fetchall()
    assert len(rows) == 1
Пример #4
0
def preexisting_entry():
    return ManifestEntry(
        '/some/other/file',
        '12345678',
        '123123',
        1000,
        1000,
        55555,
        b'lkjhasdf',
        b'12341234',
    )
Пример #5
0
def current_entry():
    return ManifestEntry(
        '/foo',
        'abcdef123',
        None,
        1000,
        1000,
        12345,
        b'aaaaa2222',
        None,
    )
Пример #6
0
def mock_manifest_entry_list():
    return [ManifestEntry(
        '/path/0/foo/bar',
        'abcd1234',
        None,
        1000,
        1000,
        35677,
        b'1111',
        None,
    )]
Пример #7
0
def test_insert_diff_key_pair_for_sha(mock_manifest, mock_stat):
    new_file = '/somebody_new'
    uid, gid, mode = mock_stat.st_uid, mock_stat.st_gid, mock_stat.st_mode
    new_entry = ManifestEntry(new_file, '12345678', None, uid, gid, mode,
                              b'2222', None)
    mock_manifest.insert_or_update(new_entry)
    mock_manifest._cursor.execute('''
        select * from manifest where sha = '12345678'
        ''')
    rows = mock_manifest._cursor.fetchall()
    assert len(rows) == 2
    assert all([r['key_pair'] == b'2222' for r in rows])
Пример #8
0
def test_insert_duplicate(mock_manifest, mock_stat):
    same_file = '/foo'
    uid, gid, mode = mock_stat.st_uid, mock_stat.st_gid, mock_stat.st_mode
    new_entry = ManifestEntry(same_file, '12345678', None, uid, gid, mode,
                              b'1111', None)
    mock_manifest.insert_or_update(new_entry)
    mock_manifest._cursor.execute('''
        select * from manifest
        where abs_file_name = '/foo'
        order by commit_timestamp
        ''')
    rows = mock_manifest._cursor.fetchall()
    assert len(rows) == 2
    assert rows[-1]['abs_file_name'] == same_file
    assert rows[0]['sha'] == '12345679'
    assert rows[-1]['sha'] == '12345678'
    assert rows[-1]['uid'] == 1000
    assert rows[-1]['gid'] == 2000
    assert rows[-1]['mode'] == 34622
    assert rows[-1]['key_pair'] == b'1111'
    assert rows[-1]['commit_timestamp'] == 1000
Пример #9
0
def test_update(mock_manifest, mock_stat, base_sha, base_key_pair):
    new_file = '/foo'
    uid, gid, mode = mock_stat.st_uid, mock_stat.st_gid, mock_stat.st_mode
    new_entry = ManifestEntry(new_file, 'b33f2', base_sha, uid, gid, mode,
                              b'1111', base_key_pair)
    mock_manifest.insert_or_update(new_entry)
    mock_manifest._cursor.execute('''
        select * from manifest left natural join base_shas
        where abs_file_name = '/foo'
        order by commit_timestamp
        ''')
    rows = mock_manifest._cursor.fetchall()
    assert len(rows) == 3
    assert rows[-1]['abs_file_name'] == new_file
    assert rows[-1]['sha'] == 'b33f2'
    assert rows[-1]['base_sha'] == base_sha
    assert rows[-1]['uid'] == 1000
    assert rows[-1]['gid'] == 2000
    assert rows[-1]['mode'] == 34622
    assert rows[-1]['key_pair'] == b'1111'
    assert rows[-1]['base_key_pair'] == base_key_pair
    assert rows[-1]['commit_timestamp'] == 1000
Пример #10
0
    def _write_diff(
        self,
        abs_file_name: str,
        new_sha: str,
        curr_entry: ManifestEntry,
        file_obj: IOIter,
        dry_run: bool,
    ) -> ManifestEntry:
        logger.info(f'Saving a diff for {abs_file_name}')

        entry_data = self._find_existing_entry_data(new_sha)
        # If the current entry is itself a diff, get its base; otherwise, this
        # entry becomes the base
        if entry_data:
            key_pair, base_sha, base_key_pair = entry_data
        elif curr_entry.base_sha:
            key_pair = generate_key_pair(self.options)
            base_sha = curr_entry.base_sha
            base_key_pair = curr_entry.base_key_pair
        else:
            key_pair = generate_key_pair(self.options)
            base_sha = curr_entry.sha
            base_key_pair = curr_entry.key_pair

        # compute a diff between the version we've previously backed up and the new version
        new_entry = ManifestEntry(
            abs_file_name,
            new_sha,
            base_sha,
            file_obj.uid,
            file_obj.gid,
            file_obj.mode,
            key_pair,
            base_key_pair,
        )

        if not entry_data:
            assert base_sha
            with IOIter() as orig_file, IOIter() as diff_file:
                orig_file = self.load(base_sha, orig_file, base_key_pair)
                try:
                    fd_diff = compute_diff(
                        orig_file,
                        file_obj,
                        diff_file,
                        self.options['discard_diff_percentage'],
                    )
                except DiffTooLargeException:
                    logger.info(
                        'The computed diff was too large; saving a copy instead.'
                    )
                    logger.info(
                        '(you can configure this threshold with the discard_diff_percentage option)'
                    )
                    file_obj.fd.seek(0)
                    return self._write_copy(abs_file_name, new_sha, file_obj,
                                            False, dry_run)

                new_entry.sha = new_sha
                if not dry_run:
                    signature = self.save(fd_diff, new_entry.sha, key_pair)
                    new_entry.key_pair = key_pair + signature
        return new_entry
Пример #11
0
    def save_if_new(
        self,
        abs_file_name: str,
        *,
        dry_run: bool = False,
        force_copy: bool = False,
    ) -> Optional[ManifestEntry]:
        """ The main workhorse function; determine if a file has changed, and if so, back it up!

        :param abs_file_name: the name of the file under consideration
        :param dry_run: whether to actually save any data or not
        :param force_copy: make a new copy of the file even if we could compute a diff instead
        """
        curr_entry, new_entry = self.manifest.get_entry(abs_file_name), None
        with IOIter(abs_file_name) as new_file:
            new_sha = compute_sha(new_file)

            # If the file hasn't been backed up before, or if it's been deleted previously, save a
            # new copy; we make a copy here to ensure that the contents don't change while backing
            # the file up, and that we have the correct sha
            if force_copy or not curr_entry or not curr_entry.sha:
                new_entry = self._write_copy(abs_file_name, new_sha, new_file,
                                             force_copy, dry_run)

            # If the file has been backed up, check to see if it's changed by comparing shas
            elif new_sha != curr_entry.sha:
                if regex_search_list(abs_file_name,
                                     self.options['skip_diff_patterns']):
                    new_entry = self._write_copy(abs_file_name, new_sha,
                                                 new_file, False, dry_run)
                else:
                    new_entry = self._write_diff(
                        abs_file_name,
                        new_sha,
                        curr_entry,
                        new_file,
                        dry_run,
                    )

            # If the sha is the same but metadata on the file has changed, we just store the updated
            # metadata
            elif (new_file.uid != curr_entry.uid
                  or new_file.gid != curr_entry.gid
                  or new_file.mode != curr_entry.mode):
                logger.info(f'Saving changed metadata for {abs_file_name}')
                new_entry = ManifestEntry(
                    abs_file_name,
                    curr_entry.sha,
                    curr_entry.base_sha,
                    new_file.uid,
                    new_file.gid,
                    new_file.mode,
                    curr_entry.
                    key_pair,  # NOTE: this is safe because the data has not changed!
                    curr_entry.base_key_pair,
                )
            else:
                # we don't want to flood the log with all the files that haven't changed
                logger.debug(f'{abs_file_name} is up to date!')

            if new_entry and not dry_run:
                self.manifest.insert_or_update(new_entry)
            return new_entry  # test_m2_crash_after_file_save