示例#1
0
class EverythingRunsTest(Test):
    """ Sanity check to ensure that various core services all run.
    """
    def __init__(self, test_context):
        """:type test_context: ducktape.tests.test.TestContext"""
        super(EverythingRunsTest, self).__init__(test_context=test_context)

        self.zk = ZookeeperService(test_context, num_nodes=2)
        self.kafka = KafkaService(test_context, 1, self.zk)
        self.schema_registry = SchemaRegistryService(test_context, 1, self.zk,
                                                     self.kafka)
        self.rest_proxy = KafkaRestService(test_context, 1, self.zk,
                                           self.kafka, self.schema_registry)
        self.register_driver = RegisterSchemasService(test_context,
                                                      1,
                                                      self.schema_registry,
                                                      retry_wait_sec=.02,
                                                      num_tries=5,
                                                      max_time_seconds=10,
                                                      max_schemas=50)

    def test(self):
        self.zk.start()
        self.kafka.start()
        self.schema_registry.start()
        self.rest_proxy.start()

        self.register_driver.start()
        self.register_driver.wait()  # block until register_driver finishes
示例#2
0
    def __init__(self, test_context, num_zk, num_brokers, num_schema_registry, retry_wait_sec=.2, num_retries=10):
        super(SchemaRegistryFailoverTest, self).__init__(test_context, num_zk, num_brokers, num_schema_registry)

        # Time to wait between registration retries
        self.retry_wait_sec = retry_wait_sec

        # Number of attempted retries
        self.num_retries = num_retries

        self.register_driver = RegisterSchemasService(
            test_context, 1, self.schema_registry,
            self.retry_wait_sec,
            self.num_retries, max_time_seconds=900)
示例#3
0
    def __init__(self, test_context):
        """:type test_context: ducktape.tests.test.TestContext"""
        super(EverythingRunsTest, self).__init__(test_context=test_context)

        self.zk = ZookeeperService(test_context, num_nodes=2)
        self.kafka = KafkaService(test_context, 1, self.zk)
        self.schema_registry = SchemaRegistryService(test_context, 1, self.zk,
                                                     self.kafka)
        self.rest_proxy = KafkaRestService(test_context, 1, self.zk,
                                           self.kafka, self.schema_registry)
        self.register_driver = RegisterSchemasService(test_context,
                                                      1,
                                                      self.schema_registry,
                                                      retry_wait_sec=.02,
                                                      num_tries=5,
                                                      max_time_seconds=10,
                                                      max_schemas=50)
    def __init__(self, test_context):
        """:type test_context: ducktape.tests.test.TestContext"""
        super(EverythingRunsTest, self).__init__(test_context=test_context)

        self.zk = ZookeeperService(test_context, num_nodes=2)
        self.kafka = KafkaService(test_context, 1, self.zk)
        self.schema_registry = SchemaRegistryService(test_context, 1, self.zk, self.kafka)
        self.rest_proxy = KafkaRestService(test_context, 1, self.zk, self.kafka, self.schema_registry)
        self.register_driver = RegisterSchemasService(
            test_context, 1, self.schema_registry,
            retry_wait_sec=.02, num_tries=5, max_time_seconds=10, max_schemas=50)
示例#5
0
    def __init__(self, test_context, num_zk, num_brokers, num_schema_registry, retry_wait_sec=.2, num_retries=10):
        super(SchemaRegistryFailoverTest, self).__init__(test_context, num_zk, num_brokers, num_schema_registry)

        # Time to wait between registration retries
        self.retry_wait_sec = retry_wait_sec

        # Number of attempted retries
        self.num_retries = num_retries

        self.register_driver = RegisterSchemasService(
            test_context, 1, self.schema_registry,
            self.retry_wait_sec,
            self.num_retries, max_time_seconds=900)
class EverythingRunsTest(Test):
    """ Sanity check to ensure that various core services all run.
    """
    def __init__(self, test_context):
        """:type test_context: ducktape.tests.test.TestContext"""
        super(EverythingRunsTest, self).__init__(test_context=test_context)

        self.zk = ZookeeperService(test_context, num_nodes=2)
        self.kafka = KafkaService(test_context, 1, self.zk)
        self.schema_registry = SchemaRegistryService(test_context, 1, self.zk, self.kafka)
        self.rest_proxy = KafkaRestService(test_context, 1, self.zk, self.kafka, self.schema_registry)
        self.register_driver = RegisterSchemasService(
            test_context, 1, self.schema_registry,
            retry_wait_sec=.02, num_tries=5, max_time_seconds=10, max_schemas=50)

    def test(self):
        self.zk.start()
        self.kafka.start()
        self.schema_registry.start()
        self.rest_proxy.start()

        self.register_driver.start()
        self.register_driver.wait()  # block until register_driver finishes
示例#7
0
class SchemaRegistryFailoverTest(SchemaRegistryTest):
    def __init__(self, test_context, num_zk, num_brokers, num_schema_registry, retry_wait_sec=.2, num_retries=10):
        super(SchemaRegistryFailoverTest, self).__init__(test_context, num_zk, num_brokers, num_schema_registry)

        # Time to wait between registration retries
        self.retry_wait_sec = retry_wait_sec

        # Number of attempted retries
        self.num_retries = num_retries

        self.register_driver = RegisterSchemasService(
            test_context, 1, self.schema_registry,
            self.retry_wait_sec,
            self.num_retries, max_time_seconds=900)

    def setUp(self):
        super(SchemaRegistryFailoverTest, self).setUp()

    def drive_failures(self):
        raise NotImplementedError("drive_failures must be implemented by a subclass.")

    def report_summary(self):
        # Gather statistics
        summary = "\n-------------------------------------------------------------------\n"
        summary += "Summary\n"
        summary += "Histogram of number of attempts needed to successfully register:\n"
        summary += str(self.register_driver.try_histogram) + "\n"

        attempted = self.register_driver.num_attempted_registrations
        succeeded = sum([1 for record in self.register_driver.registration_data if record["success"]])
        summary += "Attempted to register %d schemas. " % attempted + "\n"
        summary += "Max registration attempts allowed: %d\n" % self.num_retries
        summary += "Retry backoff: %f seconds\n" % self.retry_wait_sec
        summary += "Successful: %d/%d = %f\n" % (succeeded, attempted, succeeded / float(attempted))

        success = True

        # Verify that all ids reported as successfully registered can be fetched
        master_id = self.schema_registry.idx(self.schema_registry.get_master_node())
        base_url = self.schema_registry.url(master_id, external=True)
        registered_ids = [record["schema_id"] for record in self.register_driver.registration_data if record["success"]]
        registered_schemas = [record["schema_string"]
                              for record in self.register_driver.registration_data if record["success"]]
        summary += "Validating that schemas reported as successful can be fetched by id...\n"
        for id in registered_ids:
            try:
                schema = get_schema_by_id(base_url, id)
            except:
                success = False
                summary += "%d was reported successful but actually failed\n" % id
        summary += "Success.\n" if success else "Failure.\n"

        # Verify that number of versions fetched matches number of registered ids
        versions = get_all_versions(base_url, self.register_driver.subject)
        summary += \
            "Validating that number of reported successful registrations matches number of versions in subject...\n"
        if len(versions) != len(registered_ids):
            success = False
        summary += "Success.\n" if success else "Failure.\n"

        results = self.validate_schema_consistency()
        summary += results["message"] + "\n"
        success = success and results["success"]

        results = self.validate_registered_vs_subjectversion()
        summary += results["message"] + "\n"
        success = success and results["success"]

        results = self.validate_registered_vs_subjectschema()
        summary += results["message"] + "\n"
        success = success and results["success"]

        summary += "-------------------------------------------------------------------\n"
        assert success, summary
        self.logger.info(summary)

    def normalize_schema_string(self, schema_string):
        return json.dumps(json.loads(schema_string))

    def get_ids_and_schemas_registered(self):
        """
        Return all pairs (id, schema) that reported as successfully registered by the register schemas service.
        """
        registration_data = self.register_driver.registration_data
        return {(record["schema_id"], record["schema_string"])
                for record in registration_data if record["success"]}

    def fetch_ids_and_schemas_by_subjectschema(self, reported_records):
        """
        Return all pairs (id, schema) that can be fetched by subject/schema, for all schemas that we attempted
        to register.
        """
        attempted_schemas = [r["schema_string"] for r in self.register_driver.registration_data]
        stored_records = set()
        master_id = self.schema_registry.idx(self.schema_registry.get_master_node())
        base_url = self.schema_registry.url(master_id, external=True)

        for id, schema in reported_records:
            stored_id = get_by_schema(base_url, schema, self.register_driver.subject)["id"]
            stored_records.add((stored_id, schema))
        return stored_records

    def fetch_ids_and_schemas_by_subjectversion(self):
        """
        Return all pairs (id, schema) that can be fetched by subject/version, for all versions listed under the subject.
        """
        master_id = self.schema_registry.idx(self.schema_registry.get_master_node())
        base_url = self.schema_registry.url(master_id, external=True)
        versions = get_all_versions(base_url, self.register_driver.subject)

        fetched_ids_and_schemas = []
        failed_versions = []
        for version in versions:
            try:
                fetched_schema_info = get_schema_by_version(base_url, self.register_driver.subject, version)
                fetched_schema_string = self.normalize_schema_string(fetched_schema_info["schema"])
                fetched_ids_and_schemas.append((fetched_schema_info["id"], fetched_schema_string))
            except:
                failed_versions.append(version)
        if len(failed_versions) > 0:
            raise Exception("Failed to fetch versions: " + str(failed_versions))

        return fetched_ids_and_schemas

    def validate_registered_vs_subjectschema(self):
        """
        Check successfully registered against schemas fetched by subject/schema
        """
        registered_ids_and_schemas = self.get_ids_and_schemas_registered()
        fetched_ids_and_schemas = self.fetch_ids_and_schemas_by_subjectschema(registered_ids_and_schemas)

        registered_ids_and_schemas = set(map(lambda r: (r[0], json.loads(r[1])["fields"][0]["name"]), registered_ids_and_schemas))
        fetched_ids_and_schemas = set(map(lambda r: (r[0], json.loads(r[1])["fields"][0]["name"]), fetched_ids_and_schemas))

        message = "Validating successfully registered ids agains ids fetched by subject/schema...\n"
        success = True

        registered_not_fetched = registered_ids_and_schemas - fetched_ids_and_schemas
        if len(registered_not_fetched) > 0:
            success = False
            message += "There are registered ids which were not fetched: " + str(registered_not_fetched) + "\n"

        fetched_not_registered = fetched_ids_and_schemas - registered_ids_and_schemas
        if len(fetched_not_registered) > 0:
            success = False
            message += "There are fetched ids which were not registered: " + str(fetched_not_registered) + "\n"

        message += "Success." if success else "Failure."
        return {"success": success, "message": message}

    def validate_registered_vs_subjectversion(self):
        """
        Check successfully registered against schemas fetched by subject/version
        """

        # Validate by fetching versions
        message = ""
        message += "Validating that successfully registered ids and schemas match ids and schemas " + \
                   "fetched by subject/version...\n"

        fetched_ids_and_schemas = set()
        success = True
        try:
            fetched_ids_and_schemas = set(self.fetch_ids_and_schemas_by_subjectversion())
        except:
            message += "Problem fetching by subject/version"
            success = False

        registered_ids_and_schemas = set(self.get_ids_and_schemas_registered())

        registered_not_fetched = registered_ids_and_schemas - fetched_ids_and_schemas
        if len(registered_not_fetched) > 0:
            message += "Some registered ids were not fetched by subject/version: " + str(registered_not_fetched) + "\n"
            success = False

        fetched_not_registered = fetched_ids_and_schemas - registered_ids_and_schemas
        if len(fetched_not_registered) > 0:
            message += "Some ids fetched by subject/version were not reported as successfully registered: " + str(fetched_not_registered) + "\n"
            success = False

        message += "Success." if success else "Failure."
        return {"success": success, "message": message}

    def validate_schema_consistency(self):
        """
        Much of the use case involves
        a) register a schema, get back an id
        b) sometime later, someone else fetches the schema by id

        Therefore, verify that the id we get back for registering a particular schema still gets us
        back that same particular schema.
        """
        master_id = self.schema_registry.idx(self.schema_registry.get_master_node())
        base_url = self.schema_registry.url(master_id, external=True)

        registration_data = self.register_driver.registration_data
        message = "Validating that registered schemas match fetched schemas...\n"
        discrepencies = []
        success = True
        for datum in registration_data:
            id = datum["schema_id"]
            schema = self.normalize_schema_string(datum["schema_string"])

            try:
                found_schema = self.normalize_schema_string(get_schema_by_id(base_url, id)["schema"])
            except:
                success = False
                message += "Failed to fetch id %d. " % id

            if found_schema != schema:
                discrepencies.append((id, schema, found_schema))

        success = success and len(discrepencies) == 0
        if len(discrepencies) > 0:
            message += "Found discrepencies between registered schemas and fetched schemas (id, registered, fetched). "
            message += str(discrepencies)

        message += "Success." if success else "Failure."
        return {"success": success, "message": message}

    def test(self):
        # start schema registration in the background
        self.logger.info("Starting registration thread(s)")
        self.register_driver.start()

        # Make sure registrations have started
        while self.register_driver.num_attempted_registrations < 2:
            time.sleep(.5)

        # do the kill or bounce logic
        self.logger.info("Driving failures")
        self.drive_failures()
        self.logger.info("Finished driving failures.")

        # Wait a little before stopping registration
        num_attempted = self.register_driver.num_attempted_registrations
        while self.register_driver.num_attempted_registrations < num_attempted + 2:
            time.sleep(.5)

            if self.register_driver.ready_to_finish:
                self.logger.info("May have reached maximum registration time or maximum number of " +
                                 "registered schemas before finishing drive_failures.")
                break

        self.logger.info("Ending registration...")
        self.register_driver.ready_to_finish = True
        self.register_driver.wait()
        self.register_driver.stop()

        self.report_summary()
        time.sleep(10)
示例#8
0
class SchemaRegistryFailoverTest(SchemaRegistryTest):
    def __init__(self,
                 test_context,
                 num_zk,
                 num_brokers,
                 num_schema_registry,
                 retry_wait_sec=.2,
                 num_retries=10):
        super(SchemaRegistryFailoverTest,
              self).__init__(test_context, num_zk, num_brokers,
                             num_schema_registry)

        # Time to wait between registration retries
        self.retry_wait_sec = retry_wait_sec

        # Number of attempted retries
        self.num_retries = num_retries

        self.register_driver = RegisterSchemasService(test_context,
                                                      1,
                                                      self.schema_registry,
                                                      self.retry_wait_sec,
                                                      self.num_retries,
                                                      max_time_seconds=900)

    def setUp(self):
        super(SchemaRegistryFailoverTest, self).setUp()

    def drive_failures(self):
        raise NotImplementedError(
            "drive_failures must be implemented by a subclass.")

    def report_summary(self):
        # Gather statistics
        summary = "\n-------------------------------------------------------------------\n"
        summary += "Summary\n"
        summary += "Histogram of number of attempts needed to successfully register:\n"
        summary += str(self.register_driver.try_histogram) + "\n"

        attempted = self.register_driver.num_attempted_registrations
        succeeded = sum([
            1 for record in self.register_driver.registration_data
            if record["success"]
        ])
        summary += "Attempted to register %d schemas. " % attempted + "\n"
        summary += "Max registration attempts allowed: %d\n" % self.num_retries
        summary += "Retry backoff: %f seconds\n" % self.retry_wait_sec
        summary += "Successful: %d/%d = %f\n" % (succeeded, attempted,
                                                 succeeded / float(attempted))

        success = True

        # Verify that all ids reported as successfully registered can be fetched
        master_id = self.schema_registry.idx(
            self.schema_registry.get_master_node())
        base_url = self.schema_registry.url(master_id, external=True)
        registered_ids = [
            record["schema_id"]
            for record in self.register_driver.registration_data
            if record["success"]
        ]
        registered_schemas = [
            record["schema_string"]
            for record in self.register_driver.registration_data
            if record["success"]
        ]
        summary += "Validating that schemas reported as successful can be fetched by id...\n"
        for id in registered_ids:
            try:
                schema = get_schema_by_id(base_url, id)
            except:
                success = False
                summary += "%d was reported successful but actually failed\n" % id
        summary += "Success.\n" if success else "Failure.\n"

        # Verify that number of versions fetched matches number of registered ids
        versions = get_all_versions(base_url, self.register_driver.subject)
        summary += \
            "Validating that number of reported successful registrations matches number of versions in subject...\n"
        if len(versions) != len(registered_ids):
            success = False
        summary += "Success.\n" if success else "Failure.\n"

        results = self.validate_schema_consistency()
        summary += results["message"] + "\n"
        success = success and results["success"]

        results = self.validate_registered_vs_subjectversion()
        summary += results["message"] + "\n"
        success = success and results["success"]

        results = self.validate_registered_vs_subjectschema()
        summary += results["message"] + "\n"
        success = success and results["success"]

        summary += "-------------------------------------------------------------------\n"
        assert success, summary
        self.logger.info(summary)

    def normalize_schema_string(self, schema_string):
        return json.dumps(json.loads(schema_string))

    def get_ids_and_schemas_registered(self):
        """
        Return all pairs (id, schema) that reported as successfully registered by the register schemas service.
        """
        registration_data = self.register_driver.registration_data
        return {(record["schema_id"], record["schema_string"])
                for record in registration_data if record["success"]}

    def fetch_ids_and_schemas_by_subjectschema(self, reported_records):
        """
        Return all pairs (id, schema) that can be fetched by subject/schema, for all schemas that we attempted
        to register.
        """
        attempted_schemas = [
            r["schema_string"] for r in self.register_driver.registration_data
        ]
        stored_records = set()
        master_id = self.schema_registry.idx(
            self.schema_registry.get_master_node())
        base_url = self.schema_registry.url(master_id, external=True)

        for id, schema in reported_records:
            stored_id = get_by_schema(base_url, schema,
                                      self.register_driver.subject)["id"]
            stored_records.add((stored_id, schema))
        return stored_records

    def fetch_ids_and_schemas_by_subjectversion(self):
        """
        Return all pairs (id, schema) that can be fetched by subject/version, for all versions listed under the subject.
        """
        master_id = self.schema_registry.idx(
            self.schema_registry.get_master_node())
        base_url = self.schema_registry.url(master_id, external=True)
        versions = get_all_versions(base_url, self.register_driver.subject)

        fetched_ids_and_schemas = []
        failed_versions = []
        for version in versions:
            try:
                fetched_schema_info = get_schema_by_version(
                    base_url, self.register_driver.subject, version)
                fetched_schema_string = self.normalize_schema_string(
                    fetched_schema_info["schema"])
                fetched_ids_and_schemas.append(
                    (fetched_schema_info["id"], fetched_schema_string))
            except:
                failed_versions.append(version)
        if len(failed_versions) > 0:
            raise Exception("Failed to fetch versions: " +
                            str(failed_versions))

        return fetched_ids_and_schemas

    def validate_registered_vs_subjectschema(self):
        """
        Check successfully registered against schemas fetched by subject/schema
        """
        registered_ids_and_schemas = self.get_ids_and_schemas_registered()
        fetched_ids_and_schemas = self.fetch_ids_and_schemas_by_subjectschema(
            registered_ids_and_schemas)

        registered_ids_and_schemas = set(
            map(lambda r: (r[0], json.loads(r[1])["fields"][0]["name"]),
                registered_ids_and_schemas))
        fetched_ids_and_schemas = set(
            map(lambda r: (r[0], json.loads(r[1])["fields"][0]["name"]),
                fetched_ids_and_schemas))

        message = "Validating successfully registered ids agains ids fetched by subject/schema...\n"
        success = True

        registered_not_fetched = registered_ids_and_schemas - fetched_ids_and_schemas
        if len(registered_not_fetched) > 0:
            success = False
            message += "There are registered ids which were not fetched: " + str(
                registered_not_fetched) + "\n"

        fetched_not_registered = fetched_ids_and_schemas - registered_ids_and_schemas
        if len(fetched_not_registered) > 0:
            success = False
            message += "There are fetched ids which were not registered: " + str(
                fetched_not_registered) + "\n"

        message += "Success." if success else "Failure."
        return {"success": success, "message": message}

    def validate_registered_vs_subjectversion(self):
        """
        Check successfully registered against schemas fetched by subject/version
        """

        # Validate by fetching versions
        message = ""
        message += "Validating that successfully registered ids and schemas match ids and schemas " + \
                   "fetched by subject/version...\n"

        fetched_ids_and_schemas = set()
        success = True
        try:
            fetched_ids_and_schemas = set(
                self.fetch_ids_and_schemas_by_subjectversion())
        except:
            message += "Problem fetching by subject/version"
            success = False

        registered_ids_and_schemas = set(self.get_ids_and_schemas_registered())

        registered_not_fetched = registered_ids_and_schemas - fetched_ids_and_schemas
        if len(registered_not_fetched) > 0:
            message += "Some registered ids were not fetched by subject/version: " + str(
                registered_not_fetched) + "\n"
            success = False

        fetched_not_registered = fetched_ids_and_schemas - registered_ids_and_schemas
        if len(fetched_not_registered) > 0:
            message += "Some ids fetched by subject/version were not reported as successfully registered: " + str(
                fetched_not_registered) + "\n"
            success = False

        message += "Success." if success else "Failure."
        return {"success": success, "message": message}

    def validate_schema_consistency(self):
        """
        Much of the use case involves
        a) register a schema, get back an id
        b) sometime later, someone else fetches the schema by id

        Therefore, verify that the id we get back for registering a particular schema still gets us
        back that same particular schema.
        """
        master_id = self.schema_registry.idx(
            self.schema_registry.get_master_node())
        base_url = self.schema_registry.url(master_id, external=True)

        registration_data = self.register_driver.registration_data
        message = "Validating that registered schemas match fetched schemas...\n"
        discrepencies = []
        success = True
        for datum in registration_data:
            id = datum["schema_id"]
            schema = self.normalize_schema_string(datum["schema_string"])

            try:
                found_schema = self.normalize_schema_string(
                    get_schema_by_id(base_url, id)["schema"])
            except:
                success = False
                message += "Failed to fetch id %d. " % id

            if found_schema != schema:
                discrepencies.append((id, schema, found_schema))

        success = success and len(discrepencies) == 0
        if len(discrepencies) > 0:
            message += "Found discrepencies between registered schemas and fetched schemas (id, registered, fetched). "
            message += str(discrepencies)

        message += "Success." if success else "Failure."
        return {"success": success, "message": message}

    def test(self):
        # start schema registration in the background
        self.logger.info("Starting registration thread(s)")
        self.register_driver.start()

        # Make sure registrations have started
        while self.register_driver.num_attempted_registrations < 2:
            time.sleep(.5)

        # do the kill or bounce logic
        self.logger.info("Driving failures")
        self.drive_failures()
        self.logger.info("Finished driving failures.")

        # Wait a little before stopping registration
        num_attempted = self.register_driver.num_attempted_registrations
        while self.register_driver.num_attempted_registrations < num_attempted + 2:
            time.sleep(.5)

            if self.register_driver.ready_to_finish:
                self.logger.info(
                    "May have reached maximum registration time or maximum number of "
                    + "registered schemas before finishing drive_failures.")
                break

        self.logger.info("Ending registration...")
        self.register_driver.ready_to_finish = True
        self.register_driver.wait()
        self.register_driver.stop()

        self.report_summary()
        time.sleep(10)