Exemplo n.º 1
0
def configure_logging():
    import logging
    from chroma_agent.log import console_log, daemon_log

    console_log.addHandler(logging.StreamHandler(sys.stderr))
    console_log.setLevel(logging.INFO)
    daemon_log.addHandler(logging.StreamHandler(sys.stderr))
    daemon_log.setLevel(logging.WARNING)
Exemplo n.º 2
0
def main():
    parser = argparse.ArgumentParser(description="Simulated benchmarks")
    parser.add_argument('--remote_simulator',
                        required=False,
                        help="Disable built-in simulator (run it separately)",
                        default=False)
    parser.add_argument('--debug',
                        required=False,
                        help="Enable DEBUG-level logs",
                        default=False)
    parser.add_argument('--url',
                        required=False,
                        help="Manager URL",
                        default="https://localhost:8000")
    parser.add_argument('--username',
                        required=False,
                        help="REST API username",
                        default='admin')
    parser.add_argument('--password',
                        required=False,
                        help="REST API password",
                        default='lustre')
    parser.add_argument('--servers', help="server count", default=8, type=int)
    subparsers = parser.add_subparsers()

    log_ingest_parser = subparsers.add_parser("reset")
    log_ingest_parser.set_defaults(
        func=lambda args, simulator: Benchmark(args, simulator).reset())

    log_ingest_parser = subparsers.add_parser("log_ingest_rate")
    log_ingest_parser.set_defaults(func=lambda args, simulator: LogIngestRate(
        args, simulator).run_benchmark())

    server_count_limit_parser = subparsers.add_parser("server_count_limit")
    server_count_limit_parser.set_defaults(
        func=lambda args, simulator: ServerCountLimit(args, simulator
                                                      ).run_benchmark())

    server_count_limit_parser = subparsers.add_parser(
        "concurrent_registration_limit")
    server_count_limit_parser.set_defaults(
        func=lambda args, simulator: ConcurrentRegistrationLimit(
            args, simulator).run_benchmark())

    server_count_limit_parser = subparsers.add_parser("filesystem_size_limit")
    server_count_limit_parser.set_defaults(
        func=lambda args, simulator: FilesystemSizeLimit(args, simulator
                                                         ).run_benchmark())

    args = parser.parse_args()

    if args.debug:
        log.setLevel(logging.DEBUG)

    if not args.remote_simulator:
        log.info("Starting simulator...")

        # Enable logging by agent code run within simulator
        from chroma_agent.log import daemon_log
        daemon_log.setLevel(logging.DEBUG)
        handler = logging.FileHandler("chroma-agent.log")
        handler.setFormatter(
            logging.Formatter('[%(asctime)s] %(message)s',
                              '%d/%b/%Y:%H:%M:%S'))
        daemon_log.addHandler(handler)
        daemon_log.info("Enabled agent logging within simulator")

        from cluster_sim.simulator import ClusterSimulator
        simulator = ClusterSimulator(folder=None, url=args.url + "/")
        simulator.power.setup(1)
        simulator.start_all()
        simulator.setup(0,
                        0,
                        0,
                        nid_count=1,
                        cluster_size=4,
                        pdu_count=1,
                        su_size=0)
    else:
        simulator = xmlrpclib.ServerProxy("http://localhost:%s" %
                                          SIMULATOR_PORT,
                                          allow_none=True)

    try:
        log.info("Starting benchmark...")
        args.func(args, simulator)
    except:
        # Because we do a hard exit at the end here, explicitly log terminating
        # exceptions or they would get lost.
        log.error(traceback.format_exc())
        raise
    finally:
        # Do a hard exit to avoid dealing with lingering threads (not the cleanest, but
        # this isn't production code).
        os._exit(-1)

    log.info("Complete.")
Exemplo n.º 3
0
    def test_oversized_messages(self):
        """
        Test that oversized messages are dropped and the session is terminated
        """
        # Monkey-patch this setting to a lower limit to make testing easier
        MAX_BYTES_PER_POST = 1024
        from chroma_agent.agent_client import MAX_BYTES_PER_POST as LARGE_MAX_BYTES_PER_POST

        def set_post_limit(size):
            import chroma_agent.agent_client
            chroma_agent.agent_client.MAX_BYTES_PER_POST = size

        self.addCleanup(set_post_limit, LARGE_MAX_BYTES_PER_POST)
        set_post_limit(MAX_BYTES_PER_POST)

        client = mock.Mock()
        client._fqdn = "test_server"
        client.boot_time = IMLDateTime.utcnow()
        client.start_time = IMLDateTime.utcnow()

        writer = HttpWriter(client)

        def fake_post(envelope):
            if len(json.dumps(envelope)) > MAX_BYTES_PER_POST:
                daemon_log.info("fake_post(): rejecting oversized message")
                raise HttpError()

        client.post = mock.Mock(side_effect=fake_post)
        TestPlugin = mock.Mock()

        mock_plugin_instance = mock.Mock()
        mock_plugin_instance.start_session = mock.Mock(
            return_value={'foo': 'bar'})
        client.device_plugins.get = mock.Mock(
            return_value=lambda (plugin_name): mock_plugin_instance)
        client.device_plugins.get_plugins = mock.Mock(
            return_value={'test_plugin': TestPlugin})
        client.sessions = SessionTable(client)

        daemon_log.setLevel(logging.DEBUG)

        import string
        from random import choice
        oversized_string = "".join(
            choice(string.printable) for i in range(MAX_BYTES_PER_POST))

        # There should be one message to set up the session
        writer.poll('test_plugin')
        self.assertTrue(writer.send())
        self.assertEqual(client.post.call_count, 1)
        messages = client.post.call_args[0][0]['messages']
        self.assertEqual(len(messages), 1)
        self.assertEqual(messages[0]['type'], "SESSION_CREATE_REQUEST")
        # Pretend we got a SESSION_CREATE_RESPONSE
        client.sessions.create('test_plugin', 'id_foo')
        self.assertEqual(len(client.sessions._sessions), 1)

        # Inject a normal and an oversized message
        normal_body = DevicePluginMessage('normal', PRIO_NORMAL)
        oversized_body = DevicePluginMessage(oversized_string, PRIO_NORMAL)
        writer.put(Message("DATA", "test_plugin", normal_body, "id_foo", 0))
        writer.put(Message("DATA", "test_plugin", oversized_body, "id_foo", 1))

        # Only the normal message should get through
        self.assertTrue(writer.send())
        self.assertEqual(client.post.call_count, 2)
        messages = client.post.call_args[0][0]['messages']
        self.assertEqual(len(messages), 1)
        self.assertEqual(messages[0]['type'], "DATA")

        # The oversized message should be dropped and the session
        # terminated
        self.assertFalse(writer.send())
        self.assertEqual(client.post.call_count, 3)
        self.assertEqual(len(client.sessions._sessions), 0)

        # However, we should eventually get a new session for the
        # offending plugin
        writer.poll('test_plugin')
        self.assertTrue(writer.send())
        self.assertEqual(client.post.call_count, 4)
        messages = client.post.call_args[0][0]['messages']
        self.assertEqual(len(messages), 1)
        self.assertEqual(messages[0]['type'], "SESSION_CREATE_REQUEST")
Exemplo n.º 4
0
from blockdevice import BlockDevice
from ..lib.shell import Shell

try:
    # FIXME: this should be avoided, implicit knowledge of something outside the package
    from chroma_agent.log import daemon_log as log
except ImportError:
    log = logging.getLogger(__name__)

    if not log.handlers:
        handler = logging.FileHandler('blockdevice_zfs.log')
        handler.setFormatter(
            logging.Formatter(
                "[%(asctime)s: %(levelname)s/%(name)s] %(message)s"))
        log.addHandler(handler)
        log.setLevel(logging.DEBUG)


class NotZpoolException(Exception):
    pass


class BlockDeviceZfs(BlockDevice):
    # From lustre_disk.h
    LDD_F_SV_TYPE_MDT = 0x0001
    LDD_F_SV_TYPE_OST = 0x0002
    LDD_F_SV_TYPE_MGS = 0x0004
    LDD_F_SV_TYPE_MGS_or_MDT = (LDD_F_SV_TYPE_MGS | LDD_F_SV_TYPE_MDT)

    _supported_device_types = ['zfs']
Exemplo n.º 5
0
    def test_session_backoff(self):
        """Test that when messages to the manager are being dropped due to POST failure,
        sending SESSION_CREATE_REQUEST messages has a power-of-two backoff wait"""
        client = mock.Mock()
        client._fqdn = "test_server"
        client.boot_time = IMLDateTime.utcnow()
        client.start_time = IMLDateTime.utcnow()

        writer = client.writer = HttpWriter(client)
        reader = client.reader = HttpReader(client)

        daemon_log.setLevel(logging.DEBUG)

        TestPlugin = mock.Mock()

        mock_plugin_instance = mock.Mock()
        mock_plugin_instance.start_session = mock.Mock(
            return_value={'foo': 'bar'})
        client.device_plugins.get = mock.Mock(
            return_value=lambda (plugin_name): mock_plugin_instance)
        client.device_plugins.get_plugins = mock.Mock(
            return_value={'test_plugin': TestPlugin})
        client.sessions = SessionTable(client)

        client.post = mock.Mock(side_effect=HttpError())

        # Pick an arbitrary time to use as a base for simulated waits
        t_0 = datetime.datetime.now()

        old_datetime = datetime.datetime
        try:

            def expect_message_at(t):
                datetime.datetime.now = mock.Mock(
                    return_value=t - datetime.timedelta(seconds=0.1))
                writer.poll('test_plugin')
                self.assertEqual(writer._messages.qsize(), 0)

                datetime.datetime.now = mock.Mock(
                    return_value=t + datetime.timedelta(seconds=0.1))
                writer.poll('test_plugin')
                self.assertEqual(writer._messages.qsize(), 1)

            datetime.datetime = mock.Mock()
            datetime.datetime.now = mock.Mock(return_value=t_0)

            # Stage 1: failing to POST, backing off
            # =====================================

            # Poll should put some session creation messages
            writer.poll('test_plugin')
            self.assertEqual(writer._messages.qsize(), 1)
            # Another poll immediately after shouldn't add any messages (MIN_SESSION_BACKOFF hasn't passed)
            writer.poll('test_plugin')
            self.assertEqual(writer._messages.qsize(), 1)

            # Send should consume the messages, and they go to nowhere because the POST fails
            writer.send()
            client.post.assert_called_once()
            self.assertEqual(len(client.post.call_args[0][0]['messages']), 1)

            # First time boundary: where the first repeat should happen
            from chroma_agent.agent_client import MIN_SESSION_BACKOFF
            t_1 = t_0 + MIN_SESSION_BACKOFF

            expect_message_at(t_1)

            # Have another crack at sending, it should fail and empty the queue
            writer.send()
            self.assertTrue(writer._messages.empty())

            # Second time boundary: where the second repeat should happen
            t_2 = t_1 + MIN_SESSION_BACKOFF * 2
            expect_message_at(t_2)

            # Stage 2: success in POST, session creation
            # ==========================================

            # This time we'll let the message go through, and a session to begin.
            client.post = mock.Mock()
            writer.send()

            # HttpReader receives a response from the manager, and should reset the backoff counters.
            reader._handle_messages([{
                'type': 'SESSION_CREATE_RESPONSE',
                'plugin': 'test_plugin',
                'session_id': 'id_foo',
                'session_seq': 0,
                'body': {}
            }])
            self.assertEqual(len(client.sessions._sessions), 1)
            session = client.sessions.get('test_plugin')

            # State 3: POSTs start failing again, see delay again
            # ===================================================

            # Break the POST link again
            client.post = mock.Mock(side_effect=HttpError())

            # Poll will get a DATA message from initial_scan
            session.initial_scan = mock.Mock(return_value={'foo': 'bar'})
            writer.poll('test_plugin')
            session.initial_scan.assert_called_once()
            self.assertFalse(writer._messages.empty())

            # Send will fail to send it, and as a result destroy the session
            writer.send()
            self.assertTrue(writer._messages.empty())
            self.assertEqual(len(client.sessions._sessions), 0)

            # Move to some point beyond the first backoff cycle
            t_3 = t_0 + datetime.timedelta(seconds=60)
            datetime.datetime.now = mock.Mock(return_value=t_3)

            writer.poll('test_plugin')
            self.assertEqual(writer._messages.qsize(), 1)
            writer.poll('test_plugin')
            self.assertEqual(writer._messages.qsize(), 1)
            writer.send()
            self.assertEqual(writer._messages.qsize(), 0)

            # Check the backoff time has gone back to MIN_SESSION_BACKOFF
            t_4 = t_3 + MIN_SESSION_BACKOFF
            expect_message_at(t_4)
        finally:
            datetime.datetime = old_datetime