Пример #1
0
    def test_journal_smoke(self):
        workunit(
            self.ctx, {
                'clients': {
                    "client.{0}".format(self.mount_a.client_id):
                    ["fs/misc/trivial_sync.sh"],
                },
                "timeout": "1h"
            })

        for mount in self.mounts:
            mount.umount_wait()

        self.fs.mds_stop()
        self.fs.mds_fail()

        # journal tool smoke
        workunit(
            self.ctx, {
                'clients': {
                    "client.{0}".format(self.mount_a.client_id):
                    ["suites/cephfs_journal_tool_smoke.sh"],
                },
                "timeout": "1h"
            })

        self.fs.mds_restart()
        self.fs.wait_for_daemons()

        self.mount_a.mount()

        # trivial sync moutn a
        workunit(
            self.ctx, {
                'clients': {
                    "client.{0}".format(self.mount_a.client_id):
                    ["fs/misc/trivial_sync.sh"],
                },
                "timeout": "1h"
            })
Пример #2
0
    def test_journal_smoke(self):
        workunit(self.ctx, {
            'clients': {
                "client.{0}".format(self.mount_a.client_id): [
                    "fs/misc/trivial_sync.sh"],
            },
            "timeout": "1h"
        })

        for mount in self.mounts:
            mount.umount_wait()

        self.fs.mds_stop()
        self.fs.mds_fail()

        # journal tool smoke
        workunit(self.ctx, {
            'clients': {
                "client.{0}".format(self.mount_a.client_id): [
                    "suites/cephfs_journal_tool_smoke.sh"],
            },
            "timeout": "1h"
        })



        self.fs.mds_restart()
        self.fs.wait_for_daemons()

        self.mount_a.mount()

        # trivial sync moutn a
        workunit(self.ctx, {
            'clients': {
                "client.{0}".format(self.mount_a.client_id): [
                    "fs/misc/trivial_sync.sh"],
            },
            "timeout": "1h"
        })
    def test_journal_migration(self):
        old_journal_version = JOURNAL_FORMAT_LEGACY
        new_journal_version = JOURNAL_FORMAT_RESILIENT

        self.fs.set_ceph_conf('mds', 'mds journal format', old_journal_version)

        # Create a filesystem using the older journal format.
        self.mount_a.umount_wait()
        self.fs.mds_stop()
        self.fs.reset()
        self.fs.mds_restart()
        self.fs.wait_for_daemons()

        # Do some client work so that the log is populated with something.
        with self.mount_a.mounted():
            self.mount_a.create_files()
            self.mount_a.check_files()  # sanity, this should always pass

            # Run a more substantial workunit so that the length of the log to be
            # coverted is going span at least a few segments
            workunit(self.ctx, {
                'clients': {
                    "client.{0}".format(self.mount_a.client_id): ["suites/fsstress.sh"],
                },
                "timeout": "3h"
            })

        # Modify the ceph.conf to ask the MDS to use the new journal format.
        self.fs.set_ceph_conf('mds', 'mds journal format', new_journal_version)

        # Restart the MDS.
        self.fs.mds_fail_restart()
        self.fs.wait_for_daemons()

        # This ensures that all daemons come up into a valid state
        self.fs.wait_for_daemons()

        # Check that files created in the initial client workload are still visible
        # in a client mount.
        with self.mount_a.mounted():
            self.mount_a.check_files()

        # Verify that the journal really has been rewritten.
        journal_version = self.fs.get_journal_version()
        if journal_version != new_journal_version:
            raise RuntimeError("Journal was not upgraded, version should be {0} but is {1}".format(
                new_journal_version, journal_version()
            ))

        # Verify that cephfs-journal-tool can now read the rewritten journal
        inspect_out = self.fs.journal_tool(["journal", "inspect"])
        if not inspect_out.endswith(": OK"):
            raise RuntimeError("Unexpected journal-tool result: '{0}'".format(
                inspect_out
            ))

        self.fs.journal_tool(["event", "get", "json", "--path", "/tmp/journal.json"])
        p = self.fs.tool_remote.run(
            args=[
                "python",
                "-c",
                "import json; print len(json.load(open('/tmp/journal.json')))"
            ],
            stdout=StringIO())
        event_count = int(p.stdout.getvalue().strip())
        if event_count < 1000:
            # Approximate value of "lots", expected from having run fsstress
            raise RuntimeError("Unexpectedly few journal events: {0}".format(event_count))

        # Do some client work so that the log is populated with something.
        with self.mount_a.mounted():
            workunit(self.ctx, {
                'clients': {
                    "client.{0}".format(self.mount_a.client_id): ["fs/misc/trivial_sync.sh"],
                },
                "timeout": "3h"
            })
Пример #4
0
    def test_journal_migration(self):
        old_journal_version = JOURNAL_FORMAT_LEGACY
        new_journal_version = JOURNAL_FORMAT_RESILIENT

        self.mount_a.umount_wait()
        self.fs.mds_stop()

        # Create a filesystem using the older journal format.
        self.fs.set_ceph_conf('mds', 'mds journal format', old_journal_version)
        self.fs.mds_restart()
        self.fs.recreate()

        # Enable standby replay, to cover the bug case #8811 where
        # a standby replay might mistakenly end up trying to rewrite
        # the journal at the same time as an active daemon.
        self.fs.set_allow_standby_replay(True)

        status = self.fs.wait_for_daemons()

        self.assertTrue(self.fs.get_replay(status=status) is not None)

        # Do some client work so that the log is populated with something.
        with self.mount_a.mounted():
            self.mount_a.create_files()
            self.mount_a.check_files()  # sanity, this should always pass

            # Run a more substantial workunit so that the length of the log to be
            # coverted is going span at least a few segments
            workunit(self.ctx, {
                'clients': {
                    "client.{0}".format(self.mount_a.client_id): ["suites/fsstress.sh"],
                },
                "timeout": "3h"
            })

        # Modify the ceph.conf to ask the MDS to use the new journal format.
        self.fs.set_ceph_conf('mds', 'mds journal format', new_journal_version)

        # Restart the MDS.
        self.fs.mds_fail_restart()

        # This ensures that all daemons come up into a valid state
        status = self.fs.wait_for_daemons()

        # Check that files created in the initial client workload are still visible
        # in a client mount.
        with self.mount_a.mounted():
            self.mount_a.check_files()

        # Verify that the journal really has been rewritten.
        journal_version = self.fs.get_journal_version()
        if journal_version != new_journal_version:
            raise RuntimeError("Journal was not upgraded, version should be {0} but is {1}".format(
                new_journal_version, journal_version()
            ))

        # Verify that cephfs-journal-tool can now read the rewritten journal
        inspect_out = self.fs.journal_tool(["journal", "inspect"], 0)
        if not inspect_out.endswith(": OK"):
            raise RuntimeError("Unexpected journal-tool result: '{0}'".format(
                inspect_out
            ))

        self.fs.journal_tool(["event", "get", "json",
                              "--path", "/tmp/journal.json"], 0)
        p = self.fs.tool_remote.run(
            args=[
                "python",
                "-c",
                "import json; print len(json.load(open('/tmp/journal.json')))"
            ],
            stdout=StringIO())
        event_count = int(p.stdout.getvalue().strip())
        if event_count < 1000:
            # Approximate value of "lots", expected from having run fsstress
            raise RuntimeError("Unexpectedly few journal events: {0}".format(event_count))

        # Do some client work to check that writing the log is still working
        with self.mount_a.mounted():
            workunit(self.ctx, {
                'clients': {
                    "client.{0}".format(self.mount_a.client_id): ["fs/misc/trivial_sync.sh"],
                },
                "timeout": "3h"
            })

        # Check that both an active and a standby replay are still up
        status = self.fs.status()
        self.assertEqual(len(list(self.fs.get_replays(status=status))), 1)
        self.assertEqual(len(list(self.fs.get_ranks(status=status))), 1)
Пример #5
0
    def test_journal_migration(self):
        old_journal_version = JOURNAL_FORMAT_LEGACY
        new_journal_version = JOURNAL_FORMAT_RESILIENT

        # Pick out two daemons to use
        mds_a, mds_b = sorted(self.mds_cluster.mds_ids[0:2]) 

        self.mount_a.umount_wait()
        self.fs.mds_stop()

        # Enable standby replay, to cover the bug case #8811 where
        # a standby replay might mistakenly end up trying to rewrite
        # the journal at the same time as an active daemon.
        self.fs.set_ceph_conf('mds', 'mds standby replay', "true")
        self.fs.set_ceph_conf('mds', 'mds standby for rank', "0")

        # Create a filesystem using the older journal format.
        self.fs.set_ceph_conf('mds', 'mds journal format', old_journal_version)
        self.fs.recreate()
        self.fs.mds_restart(mds_id=mds_a)
        self.fs.wait_for_daemons()
        self.assertEqual(self.fs.get_active_names(), [mds_a])

        def replay_names():
            return [s['name']
                    for s in self.fs.status().get_replays(fscid = self.fs.id)]

        # Start the standby and wait for it to come up
        self.fs.mds_restart(mds_id=mds_b)
        self.wait_until_equal(
                replay_names,
                [mds_b],
                timeout = 30)

        # Do some client work so that the log is populated with something.
        with self.mount_a.mounted():
            self.mount_a.create_files()
            self.mount_a.check_files()  # sanity, this should always pass

            # Run a more substantial workunit so that the length of the log to be
            # coverted is going span at least a few segments
            workunit(self.ctx, {
                'clients': {
                    "client.{0}".format(self.mount_a.client_id): ["suites/fsstress.sh"],
                },
                "timeout": "3h"
            })

        # Modify the ceph.conf to ask the MDS to use the new journal format.
        self.fs.set_ceph_conf('mds', 'mds journal format', new_journal_version)

        # Restart the MDS.
        self.fs.mds_fail_restart(mds_id=mds_a)
        self.fs.mds_fail_restart(mds_id=mds_b)

        # This ensures that all daemons come up into a valid state
        self.fs.wait_for_daemons()

        # Check that files created in the initial client workload are still visible
        # in a client mount.
        with self.mount_a.mounted():
            self.mount_a.check_files()

        # Verify that the journal really has been rewritten.
        journal_version = self.fs.get_journal_version()
        if journal_version != new_journal_version:
            raise RuntimeError("Journal was not upgraded, version should be {0} but is {1}".format(
                new_journal_version, journal_version()
            ))

        # Verify that cephfs-journal-tool can now read the rewritten journal
        inspect_out = self.fs.journal_tool(["journal", "inspect"], 0)
        if not inspect_out.endswith(": OK"):
            raise RuntimeError("Unexpected journal-tool result: '{0}'".format(
                inspect_out
            ))

        self.fs.journal_tool(["event", "get", "json",
                              "--path", "/tmp/journal.json"], 0)
        p = self.fs.tool_remote.run(
            args=[
                "python",
                "-c",
                "import json; print len(json.load(open('/tmp/journal.json')))"
            ],
            stdout=StringIO())
        event_count = int(p.stdout.getvalue().strip())
        if event_count < 1000:
            # Approximate value of "lots", expected from having run fsstress
            raise RuntimeError("Unexpectedly few journal events: {0}".format(event_count))

        # Do some client work to check that writing the log is still working
        with self.mount_a.mounted():
            workunit(self.ctx, {
                'clients': {
                    "client.{0}".format(self.mount_a.client_id): ["fs/misc/trivial_sync.sh"],
                },
                "timeout": "3h"
            })

        # Check that both an active and a standby replay are still up
        self.assertEqual(len(replay_names()), 1)
        self.assertEqual(len(self.fs.get_active_names()), 1)
        self.assertTrue(self.mds_cluster.mds_daemons[mds_a].running())
        self.assertTrue(self.mds_cluster.mds_daemons[mds_b].running())
def task(ctx, config):
    """
    Given a Ceph cluster has already been set up, exercise the migration
    of the CephFS journal from an older format to the latest format.  On
    successful completion the filesystem will be running with a journal
    in the new format.

    Optionally specify which client to use like this:

    - mds-journal_migration:
        client: client.0

    """
    if not hasattr(ctx, 'ceph'):
        raise RuntimeError("This task must be nested in 'ceph' task")

    if not hasattr(ctx, 'mounts'):
        raise RuntimeError("This task must be nested inside 'kclient' or 'ceph_fuse' task")

    # Determine which client we will use
    if config and 'client' in config:
        # Use client specified in config
        client_role = config['client']
        client_list = list(misc.get_clients(ctx, [client_role]))
        try:
            client_id = client_list[0][0]
        except IndexError:
            raise RuntimeError("Client role '{0}' not found".format(client_role))
    else:
        # Pick one arbitrary client to use
        client_list = list(misc.all_roles_of_type(ctx.cluster, 'client'))
        try:
            client_id = client_list[0]
        except IndexError:
            raise RuntimeError("This task requires at least one client")

    fs = Filesystem(ctx)
    ctx.fs = fs
    old_journal_version = JOURNAL_FORMAT_LEGACY
    new_journal_version = JOURNAL_FORMAT_RESILIENT

    fs.set_ceph_conf('mds', 'mds journal format', old_journal_version)

    # Create a filesystem using the older journal format.
    for mount in ctx.mounts.values():
        mount.umount_wait()
    fs.mds_stop()
    fs.reset()
    fs.mds_restart()

    # Do some client work so that the log is populated with something.
    mount = ctx.mounts[client_id]
    with mount.mounted():
        mount.create_files()
        mount.check_files()  # sanity, this should always pass

        # Run a more substantial workunit so that the length of the log to be
        # coverted is going span at least a few segments
        workunit(ctx, {
            'clients': {
                "client.{0}".format(client_id): ["suites/fsstress.sh"],
            },
            "timeout": "3h"
        })

    # Modify the ceph.conf to ask the MDS to use the new journal format.
    fs.set_ceph_conf('mds', 'mds journal format', new_journal_version)

    # Restart the MDS.
    fs.mds_fail_restart()
    fs.wait_for_daemons()

    # This ensures that all daemons come up into a valid state
    fs.wait_for_daemons()

    # Check that files created in the initial client workload are still visible
    # in a client mount.
    with mount.mounted():
        mount.check_files()

    # Verify that the journal really has been rewritten.
    journal_version = fs.get_journal_version()
    if journal_version != new_journal_version:
        raise RuntimeError("Journal was not upgraded, version should be {0} but is {1}".format(
            new_journal_version, journal_version()
        ))

    # Verify that cephfs-journal-tool can now read the rewritten journal
    proc = mount.client_remote.run(
        args=["cephfs-journal-tool", "journal", "inspect"],
        stdout=StringIO())
    if not proc.stdout.getvalue().strip().endswith(": OK"):
        raise RuntimeError("Unexpected journal-tool result: '{0}'".format(
            proc.stdout.getvalue()
        ))

    mount.client_remote.run(
        args=["sudo", "cephfs-journal-tool", "event", "get", "json", "--path", "/tmp/journal.json"])
    proc = mount.client_remote.run(
        args=[
            "python",
            "-c",
            "import json; print len(json.load(open('/tmp/journal.json')))"
        ],
        stdout=StringIO())
    event_count = int(proc.stdout.getvalue().strip())
    if event_count < 1000:
        # Approximate value of "lots", expected from having run fsstress
        raise RuntimeError("Unexpectedly few journal events: {0}".format(event_count))

    # Leave all MDSs and clients running for any child tasks
    for mount in ctx.mounts.values():
        mount.mount()
        mount.wait_until_mounted()

    yield