def alter_topic_config(self, topic: str, key: str, value: typing.Union[str, int]): """ Alter a topic configuration property. """ rpk = RpkTool(self._redpanda) rpk.alter_topic_config(topic, key, value)
def test_recovery_after_multiple_restarts(self): self.start_redpanda(3, extra_rp_conf=self._extra_rp_conf) spec = TopicSpec(partition_count=60, replication_factor=3) DefaultClient(self.redpanda).create_topic(spec) self.topic = spec.name rpk = RpkTool(self.redpanda) rpk.alter_topic_config(spec.name, 'redpanda.remote.write', 'true') rpk.alter_topic_config(spec.name, 'redpanda.remote.read', 'true') self.start_producer(1, throughput=100) self.start_consumer(1) self.await_startup() def no_under_replicated_partitions(): metric_sample = self.redpanda.metrics_sample("under_replicated") for s in metric_sample.samples: if s.value > 0: return False return True # restart all the nodes and wait for recovery for i in range(0, 10): for n in self.redpanda.nodes: self.redpanda.signal_redpanda(n) self.redpanda.start_node(n) wait_until(no_under_replicated_partitions, 30, 2) self.run_validation(enable_idempotence=False, producer_timeout_sec=60, consumer_timeout_sec=180)
def test_overrides_set(self): topic = self.topics[0].name rpk = RpkTool(self.redpanda) original_output = rpk.describe_topic_configs(topic) self.logger.info(f"original_output={original_output}") assert original_output["redpanda.remote.read"][0] == "true" assert original_output["redpanda.remote.write"][0] == "true" rpk.alter_topic_config(topic, "redpanda.remote.read", "false") rpk.alter_topic_config(topic, "redpanda.remote.read", "false") altered_output = rpk.describe_topic_configs(topic) self.logger.info(f"altered_output={altered_output}") assert altered_output["redpanda.remote.read"][0] == "false" assert altered_output["redpanda.remote.write"][0] == "false"
class ShadowIndexingFirewallTest(RedpandaTest): log_segment_size = 1048576 # 1MB retention_bytes = 1024 # 1 KB s3_topic_name = "panda-topic" topics = (TopicSpec(name=s3_topic_name, partition_count=1, replication_factor=3), ) def __init__(self, test_context): si_settings = SISettings(cloud_storage_reconciliation_interval_ms=500, cloud_storage_max_connections=5, log_segment_size=self.log_segment_size) super(ShadowIndexingFirewallTest, self).__init__(test_context=test_context, si_settings=si_settings) self._s3_port = si_settings.cloud_storage_api_endpoint_port self.rpk = RpkTool(self.redpanda) @cluster(num_nodes=3, log_allow_list=CONNECTION_ERROR_LOGS) def test_consume_from_blocked_s3(self): produce_until_segments(redpanda=self.redpanda, topic=self.s3_topic_name, partition_idx=0, count=5, acks=-1) self.rpk.alter_topic_config(self.s3_topic_name, TopicSpec.PROPERTY_RETENTION_BYTES, self.retention_bytes) wait_for_segments_removal(redpanda=self.redpanda, topic=self.s3_topic_name, partition_idx=0, count=4) """Disconnect redpanda from S3 and try to read starting with offset 0""" with firewall_blocked(self.redpanda.nodes, self._s3_port): try: out = self.rpk.consume(topic=self.s3_topic_name) except RpkException as e: assert 'timed out' in e.msg else: raise RuntimeError( f"RPK consume should have timed out, but ran with output: {out}" )
def test_recovery_after_multiple_restarts(self): # If a debug build has to do a restart across a significant # number of partitions, it gets slow. Use fewer partitions # on debug builds. partition_count = 10 if self.debug_mode else 60 si_settings = SISettings(cloud_storage_reconciliation_interval_ms=500, cloud_storage_max_connections=5, log_segment_size=self.log_segment_size) self.s3_bucket_name = si_settings.cloud_storage_bucket self.start_redpanda(3, extra_rp_conf=self._extra_rp_conf, si_settings=si_settings) spec = TopicSpec(partition_count=partition_count, replication_factor=3) DefaultClient(self.redpanda).create_topic(spec) self.topic = spec.name rpk = RpkTool(self.redpanda) rpk.alter_topic_config(spec.name, 'redpanda.remote.write', 'true') rpk.alter_topic_config(spec.name, 'redpanda.remote.read', 'true') self.start_producer(1, throughput=100) self.start_consumer(1) self.await_startup() def no_under_replicated_partitions(): metric_sample = self.redpanda.metrics_sample("under_replicated") for s in metric_sample.samples: if s.value > 0: return False return True # restart all the nodes and wait for recovery for i in range(0, 10): for n in self.redpanda.nodes: self.redpanda.signal_redpanda(n) self.redpanda.start_node(n) wait_until(no_under_replicated_partitions, 30, 2) self.run_validation(enable_idempotence=False, producer_timeout_sec=60, consumer_timeout_sec=180)
def test_overrides_remove(self): topic = self.topics[0].name rpk = RpkTool(self.redpanda) original_output = rpk.describe_topic_configs(topic) self.logger.info(f"original_output={original_output}") assert original_output["redpanda.remote.read"][0] == "true" assert original_output["redpanda.remote.write"][0] == "true" # disable shadow indexing for topic rpk.alter_topic_config(topic, "redpanda.remote.read", "false") rpk.alter_topic_config(topic, "redpanda.remote.write", "false") altered_output = rpk.describe_topic_configs(topic) self.logger.info(f"altered_output={altered_output}") assert altered_output["redpanda.remote.read"][0] == "false" assert altered_output["redpanda.remote.write"][0] == "false" # delete topic configs (value from configuration should be used) rpk.delete_topic_config(topic, "redpanda.remote.read") rpk.delete_topic_config(topic, "redpanda.remote.write") altered_output = rpk.describe_topic_configs(topic) self.logger.info(f"altered_output={altered_output}") assert altered_output["redpanda.remote.read"][0] == "true" assert altered_output["redpanda.remote.write"][0] == "true"
def setUp(self): # Dedicated nodes refers to non-container nodes such as EC2 instances self.topics[ 0].partition_count = 100 if self.redpanda.dedicated_nodes else 10 # Topic creation happens here super().setUp() # Remote write/read and retention set at topic level rpk = RpkTool(self.redpanda) rpk.alter_topic_config(self.topic, 'redpanda.remote.write', 'true') rpk.alter_topic_config(self.topic, 'redpanda.remote.read', 'true') rpk.alter_topic_config(self.topic, 'retention.bytes', str(self.segment_size))
def test_querying_remote_partitions(self): topic = TopicSpec(redpanda_remote_read=True, redpanda_remote_write=True) epoch_offsets = {} rpk = RpkTool(self.redpanda) self.client().create_topic(topic) rpk.alter_topic_config(topic.name, "redpanda.remote.read", 'true') rpk.alter_topic_config(topic.name, "redpanda.remote.write", 'true') def wait_for_topic(): wait_until(lambda: len(list(rpk.describe_topic(topic.name))) > 0, 30, backoff_sec=2) # restart whole cluster 6 times to trigger term rolls for i in range(0, 6): wait_for_topic() produce_until_segments( redpanda=self.redpanda, topic=topic.name, partition_idx=0, count=2 * i, ) res = list(rpk.describe_topic(topic.name)) epoch_offsets[res[0].leader_epoch] = res[0].high_watermark self.redpanda.restart_nodes(self.redpanda.nodes) self.logger.info(f"ledear epoch high watermarks: {epoch_offsets}") wait_for_topic() rpk.alter_topic_config(topic.name, TopicSpec.PROPERTY_RETENTION_BYTES, OffsetForLeaderEpochArchivalTest.segment_size) wait_for_segments_removal(redpanda=self.redpanda, topic=topic.name, partition_idx=0, count=7) kcl = KCL(self.redpanda) for epoch, offset in epoch_offsets.items(): self.logger.info(f"querying partition epoch {epoch} end offsets") epoch_end_offset = kcl.offset_for_leader_epoch( topics=topic.name, leader_epoch=epoch)[0].epoch_end_offset self.logger.info( f"epoch {epoch} end_offset: {epoch_end_offset}, expected offset: {offset}" ) assert epoch_end_offset == offset
def _workload(self, segment_size): rpk = RpkTool(self.redpanda) rpk.alter_topic_config(self.topic, 'redpanda.remote.write', 'true') rpk.alter_topic_config(self.topic, 'redpanda.remote.read', 'true') rpk.alter_topic_config(self.topic, 'retention.bytes', str(segment_size)) self._producer.start(clean=False) # Don't start consumers until the producer has written out its first # checkpoint with valid ranges. wait_until(lambda: self._producer.produce_status.acked > 0, timeout_sec=30, backoff_sec=5.0) # nce we've written a lot of data, check that some of it showed up in S3 wait_until(lambda: self._producer.produce_status.acked > 10000, timeout_sec=300, backoff_sec=5) objects = list(self.redpanda.get_objects_from_si()) assert len(objects) > 0 for o in objects: self.logger.info(f"S3 object: {o.Key}, {o.ContentLength}") wrote_at_least = self._producer.produce_status.acked for consumer in self._consumers: consumer.start(clean=False) # Wait until we have written all the data we expected to write self._producer.wait() assert self._producer.produce_status.acked >= self.PRODUCE_COUNT # Wait for last iteration of consumers to finish: if they are currently # mid-run, they'll run to completion. for consumer in self._consumers: consumer.shutdown() for consumer in self._consumers: consumer.wait() assert self._seq_consumer.consumer_status.valid_reads >= wrote_at_least assert self._rand_consumer.consumer_status.total_reads == self.RANDOM_READ_COUNT * self.RANDOM_READ_PARALLEL assert self._cg_consumer.consumer_status.valid_reads >= wrote_at_least
class ArchivalTest(RedpandaTest): log_segment_size = 1048576 # 1MB log_compaction_interval_ms = 10000 s3_host_name = "minio-s3" s3_access_key = "panda-user" s3_secret_key = "panda-secret" s3_region = "panda-region" s3_topic_name = "panda-topic" topics = (TopicSpec(name='panda-topic', partition_count=1, replication_factor=3), ) def __init__(self, test_context): self.s3_bucket_name = f"panda-bucket-{uuid.uuid1()}" self._extra_rp_conf = dict( cloud_storage_enabled=True, cloud_storage_access_key=ArchivalTest.s3_access_key, cloud_storage_secret_key=ArchivalTest.s3_secret_key, cloud_storage_region=ArchivalTest.s3_region, cloud_storage_bucket=self.s3_bucket_name, cloud_storage_disable_tls=True, cloud_storage_api_endpoint=ArchivalTest.s3_host_name, cloud_storage_api_endpoint_port=9000, cloud_storage_reconciliation_interval_ms=500, cloud_storage_max_connections=5, log_compaction_interval_ms=self.log_compaction_interval_ms, log_segment_size=self.log_segment_size, ) if test_context.function_name == "test_timeboxed_uploads": self._extra_rp_conf.update( log_segment_size=1024 * 1024 * 1024, cloud_storage_segment_max_upload_interval_sec=1) super(ArchivalTest, self).__init__(test_context=test_context, extra_rp_conf=self._extra_rp_conf) self.kafka_tools = KafkaCliTools(self.redpanda) self.rpk = RpkTool(self.redpanda) self.s3_client = S3Client( region='panda-region', access_key=u"panda-user", secret_key=u"panda-secret", endpoint=f'http://{ArchivalTest.s3_host_name}:9000', logger=self.logger) def setUp(self): self.s3_client.empty_bucket(self.s3_bucket_name) self.s3_client.create_bucket(self.s3_bucket_name) # Deletes in S3 are eventually consistent so we might still # see previously removed objects for a while. validate(self._check_bucket_is_emtpy, self.logger, 300) super().setUp() # topic is created here # enable archival for topic for topic in self.topics: self.rpk.alter_topic_config(topic.name, 'redpanda.remote.write', 'true') def tearDown(self): self.s3_client.empty_bucket(self.s3_bucket_name) super().tearDown() @cluster(num_nodes=3) def test_write(self): """Simpe smoke test, write data to redpanda and check if the data hit the S3 storage bucket""" self.kafka_tools.produce(self.topic, 10000, 1024) validate(self._quick_verify, self.logger, 90) @cluster(num_nodes=3) def test_isolate(self): """Verify that our isolate/rejoin facilities actually work""" with firewall_blocked(self.redpanda.nodes, self._get_s3_endpoint_ip()): self.kafka_tools.produce(self.topic, 10000, 1024) time.sleep(10) # can't busy wait here # Topic manifest can be present in the bucket because topic is created before # firewall is blocked. No segments or partition manifest should be present. topic_manifest_id = "d0000000/meta/kafka/panda-topic/topic_manifest.json" objects = self.s3_client.list_objects(self.s3_bucket_name) keys = [x.Key for x in objects] assert len(keys) < 2, \ f"Bucket should be empty or contain only {topic_manifest_id}, but contains {keys}" if len(keys) == 1: assert topic_manifest_id == keys[0], \ f"Bucket should be empty or contain only {topic_manifest_id}, but contains {keys[0]}" @cluster(num_nodes=3) def test_reconnect(self): """Disconnect redpanda from S3, write data, connect redpanda to S3 and check that the data is uploaded""" with firewall_blocked(self.redpanda.nodes, self._get_s3_endpoint_ip()): self.kafka_tools.produce(self.topic, 10000, 1024) time.sleep(10) # sleep is needed because we need to make sure that # reconciliation loop kicked in and started uploading # data, otherwse we can rejoin before archival storage # will even try to upload new segments validate(self._quick_verify, self.logger, 90) @cluster(num_nodes=3) def test_one_node_reconnect(self): """Disconnect one redpanda node from S3, write data, connect redpanda to S3 and check that the data is uploaded""" self.kafka_tools.produce(self.topic, 1000, 1024) leaders = list(self._get_partition_leaders().values()) with firewall_blocked(leaders[0:1], self._get_s3_endpoint_ip()): self.kafka_tools.produce(self.topic, 9000, 1024) time.sleep(10) # sleep is needed because we need to make sure that # reconciliation loop kicked in and started uploading # data, otherwse we can rejoin before archival storage # will even try to upload new segments validate(self._quick_verify, self.logger, 90) @cluster(num_nodes=3) def test_connection_drop(self): """Disconnect redpanda from S3 during the active upload, restore connection and check that everything is uploaded""" self.kafka_tools.produce(self.topic, 10000, 1024) with firewall_blocked(self.redpanda.nodes, self._get_s3_endpoint_ip()): time.sleep(10) # sleep is needed because we need to make sure that # reconciliation loop kicked in and started uploading # data, otherwse we can rejoin before archival storage # will even try to upload new segments validate(self._quick_verify, self.logger, 90) @cluster(num_nodes=3) def test_connection_flicker(self): """Disconnect redpanda from S3 during the active upload for short period of time during upload and check that everything is uploaded""" con_enabled = True for _ in range(0, 20): # upload data in batches if con_enabled: with firewall_blocked(self.redpanda.nodes, self._get_s3_endpoint_ip()): self.kafka_tools.produce(self.topic, 500, 1024) else: self.kafka_tools.produce(self.topic, 500, 1024) con_enabled = not con_enabled time.sleep(1) time.sleep(10) validate(self._quick_verify, self.logger, 90) @cluster(num_nodes=3) def test_single_partition_leadership_transfer(self): """Start uploading data, restart leader node of the partition 0 to trigger the leadership transfer, continue upload, verify S3 bucket content""" self.kafka_tools.produce(self.topic, 5000, 1024) time.sleep(5) leaders = self._get_partition_leaders() node = leaders[0] self.redpanda.stop_node(node) time.sleep(1) self.redpanda.start_node(node) time.sleep(5) self.kafka_tools.produce(self.topic, 5000, 1024) validate(self._cross_node_verify, self.logger, 90) @cluster(num_nodes=3) def test_all_partitions_leadership_transfer(self): """Start uploading data, restart leader nodes of all partitions to trigger the leadership transfer, continue upload, verify S3 bucket content""" self.kafka_tools.produce(self.topic, 5000, 1024) time.sleep(5) leaders = self._get_partition_leaders() for ip, node in leaders.items(): self.logger.debug(f"going to restart node {ip}") self.redpanda.stop_node(node) time.sleep(1) self.redpanda.start_node(node) time.sleep(5) self.kafka_tools.produce(self.topic, 5000, 1024) validate(self._cross_node_verify, self.logger, 90) @cluster(num_nodes=3) def test_timeboxed_uploads(self): """This test checks segment upload time limit. The feature is enabled in the configuration. The configuration defines maximum time interval between uploads. If the option is set then redpanda will start uploading a segment partially if configured amount of time passed since previous upload and the segment has some new data. The test sets the timeout value to 1s. Then it uploads data in batches with delays between the batches. The segment size is set to 1GiB. We upload 10MiB total. So normally, there won't be any data uploaded to Minio. But since the time limit for a segment is set to 1s we will see a bunch of segments in the bucket. The offsets of the segments won't align with the segment in the redpanda data directory. But their respective offset ranges should align and the sizes should make sense. """ # The offsets of the segments in the Minio bucket won't necessary # correlate with the write bursts here. The upload depends on the # timeout but also on raft and current high_watermark. So we can # expect that the bucket won't have 9 segments with 1000 offsets. # The actual segments will be larger. for i in range(0, 10): self.kafka_tools.produce(self.topic, 1000, 1024) time.sleep(1) time.sleep(5) def check_upload(): # check that the upload happened ntps = set() sizes = {} for node in self.redpanda.nodes: checksums = self._get_redpanda_log_segment_checksums(node) self.logger.info( f"Node: {node.account.hostname} checksums: {checksums}") lst = [ _parse_normalized_segment_path(path, md5, size) for path, (md5, size) in checksums.items() ] lst = sorted(lst, key=lambda x: x.base_offset) segments = defaultdict(int) sz = defaultdict(int) for it in lst: ntps.add(it.ntp) sz[it.ntp] += it.size segments[it.ntp] += 1 for ntp, s in segments.items(): assert s != 0, f"expected to have at least one segment per partition, got {s}" for ntp, s in sz.items(): if ntp not in sizes: sizes[ntp] = s # Download manifest for partitions for ntp in ntps: manifest = self._download_partition_manifest(ntp) self.logger.info(f"downloaded manifest {manifest}") segments = [] for _, segment in manifest['segments'].items(): segments.append(segment) segments = sorted(segments, key=lambda s: s['base_offset']) self.logger.info(f"sorted segments {segments}") prev_committed_offset = -1 size = 0 for segment in segments: self.logger.info( f"checking {segment} prev: {prev_committed_offset}") base_offset = segment['base_offset'] assert prev_committed_offset + 1 == base_offset, f"inconsistent segments, " +\ "expected base_offset: {prev_committed_offset + 1}, actual: {base_offset}" prev_committed_offset = segment['committed_offset'] size += segment['size_bytes'] assert sizes[ntp] >= size assert size > 0 validate(check_upload, self.logger, 90) @cluster(num_nodes=3) def test_retention_archival_coordination(self): """ Test that only archived segments can be evicted and that eviction restarts once the segments have been archived. """ self.kafka_tools.alter_topic_config( self.topic, { TopicSpec.PROPERTY_RETENTION_BYTES: 5 * self.log_segment_size, }, ) with firewall_blocked(self.redpanda.nodes, self._get_s3_endpoint_ip()): produce_until_segments(redpanda=self.redpanda, topic=self.topic, partition_idx=0, count=10) # Sleep some time sufficient for log eviction under normal conditions # and check that no segment has been evicted (because we can't upload # segments to the cloud storage). time.sleep(3 * self.log_compaction_interval_ms / 1000.0) counts = list( segments_count(self.redpanda, self.topic, partition_idx=0)) self.logger.info(f"node segment counts: {counts}") assert len(counts) == len(self.redpanda.nodes) assert all(c >= 10 for c in counts) # Check that eviction restarts after we restored the connection to cloud # storage. wait_for_segments_removal(redpanda=self.redpanda, topic=self.topic, partition_idx=0, count=6) def _check_bucket_is_emtpy(self): allobj = self._list_objects() for obj in allobj: self.logger.debug( f"found object {obj} in bucket {self.s3_bucket_name}") assert len(allobj) == 0 def _get_partition_leaders(self): kcat = KafkaCat(self.redpanda) m = kcat.metadata() self.logger.info(f"kcat.metadata() == {m}") brokers = {} for b in m['brokers']: id = b['id'] ip = b['name'] ip = ip[:ip.index(':')] for n in self.redpanda.nodes: n_ip = n.account.hostname self.logger.debug(f"matching {n_ip} over {ip}") if n_ip == ip: brokers[id] = n break self.logger.debug(f"found brokers {brokers}") assert len(brokers) == 3 leaders = {} for topic in m['topics']: if topic['topic'] == ArchivalTest.s3_topic_name: for part in topic['partitions']: leader_id = part['leader'] partition_id = part['partition'] leader = brokers[leader_id] leaders[partition_id] = leader return leaders def _download_partition_manifest(self, ntp): """Find and download individual partition manifest""" expected = f"{ntp.ns}/{ntp.topic}/{ntp.partition}_{ntp.revision}/manifest.json" id = None objects = [] for loc in self._list_objects(): objects.append(loc) if expected in loc: id = loc break if id is None: objlist = "\n".join(objects) self.logger.debug( f"expected path {expected} is not found in the bucket, bucket content: \n{objlist}" ) assert not id is None manifest = self.s3_client.get_object_data(self.s3_bucket_name, id) self.logger.info(f"manifest found: {manifest}") return json.loads(manifest) def _verify_manifest(self, ntp, manifest, remote): """Check that all segments that present in manifest are available in remote storage""" for sname, _ in manifest['segments'].items(): spath = f"{ntp.ns}/{ntp.topic}/{ntp.partition}_{ntp.revision}/{sname}" self.logger.info(f"validating manifest path {spath}") assert spath in remote ranges = [(int(m['base_offset']), int(m['committed_offset'])) for _, m in manifest['segments'].items()] ranges = sorted(ranges, key=lambda x: x[0]) last_offset = -1 num_gaps = 0 for base, committed in ranges: if last_offset + 1 != base: self.logger.debug( f"gap between {last_offset} and {base} detected") num_gaps += 1 last_offset = committed assert num_gaps == 0 def _cross_node_verify(self): """Verify data on all nodes taking into account possible alignment issues caused by leadership transitions. The verification algorithm is following: - Download and verify partition manifest; - Partition manifest has all segments and metadata like committed offset and base offset. We can also retrieve MD5 hash of every segment; - Load segment metadata for every redpanda node. - Scan every node's metadata and match segments with manifest, on success remove matched segment from the partition manifest. The goal #1 is to remove all segments from the manifest. The goal #2 is to find the last segment that's supposed to be uploaded from the leader node, it's base offset should be equal to manifest's last offset + 1. The segments match if: - The base offset and md5 hashes are the same; - The committed offset of both segments are the same, md5 hashes are different, and base offset of the segment from manifest is larger than base offset of the segment from redpanda node. In this case we should also compare the data directly by scanning both segments. """ nodes = {} ntps = set() for node in self.redpanda.nodes: checksums = self._get_redpanda_log_segment_checksums(node) self.logger.info( f"Node: {node.account.hostname} checksums: {checksums}") lst = [ _parse_normalized_segment_path(path, md5, size) for path, (md5, size) in checksums.items() ] lst = sorted(lst, key=lambda x: x.base_offset) nodes[node.account.hostname] = lst for it in lst: ntps.add(it.ntp) # Download metadata from S3 remote = self._get_redpanda_s3_checksums() # Download manifest for partitions manifests = {} for ntp in ntps: manifest = self._download_partition_manifest(ntp) manifests[ntp] = manifest self._verify_manifest(ntp, manifest, remote) for ntp in ntps: self.logger.debug(f"verifying {ntp}") manifest = manifests[ntp] segments = manifest['segments'] manifest_segments = [ _parse_manifest_segment(manifest, sname, meta, remote, self.logger) for sname, meta in segments.items() ] manifest_segments = sorted(manifest_segments, key=lambda x: x.base_offset) for node_key, node_segments in nodes.items(): self.logger.debug(f"checking {ntp} on {node_key}") for mix, msegm in enumerate(manifest_segments): if not msegm is None: segments = sorted([ segment for segment in node_segments if segment.ntp == ntp ], key=lambda x: x.base_offset) self.logger.debug( f"checking manifest segment {msegm} over {node_key} segments {segments}" ) found = False for ix in range(0, len(segments)): nsegm = segments[ix] if nsegm.ntp != ntp: continue nsegm_co = -1 if (ix + 1) == len(segments) else ( segments[ix + 1].base_offset - 1) self.logger.debug( f"comparing {msegm.base_offset}:{msegm.committed_offset}:{msegm.md5} to {nsegm.base_offset}:{nsegm_co}:{nsegm.md5}" ) if msegm.base_offset == nsegm.base_offset and msegm.md5 == nsegm.md5: # Success self.logger.info( f"found match for segment {msegm.ntp} {msegm.base_offset} on {node_key}" ) manifest_segments[mix] = None found = True break if msegm.committed_offset == nsegm_co and msegm.base_offset > nsegm.base_offset: # Found segment with truncated head (due to leadership transition) actual_hash = self._get_partial_checksum( node_key, nsegm.normalized_path, msegm.size) self.logger.info( f"partial hash {actual_hash} retreived, s3 hash {msegm.md5}" ) if actual_hash == msegm.md5: manifest_segments[mix] = None self.logger.info( f"partial match for segment {msegm.ntp} {msegm.base_offset}-" + f"{msegm.committed_offset} on {node_key}" ) found = True break if not found: self.logger.debug( f"failed to match {msegm.base_offset}:{msegm.committed_offset}" ) else: self.logger.debug( f"matched {msegm.base_offset}:{msegm.committed_offset} successfully" ) # All segments should be matched and set to None if any(manifest_segments): self.logger.debug( f"manifest segments that fail to validate: {manifest_segments}" ) assert not any(manifest_segments) # Verify goal #2, the last segment on a leader node is manifest.last_offset + 1 ntp_offsets = [] for node_key, node_segments in nodes.items(): offsets = [ segm.base_offset for segm in node_segments if segm.ntp == ntp ] if offsets: max_offset = max([ segm.base_offset for segm in node_segments if segm.ntp == ntp ]) ntp_offsets.append(max_offset) self.logger.debug( f"NTP {ntp} has the largest offset {max_offset} on node {node_key}" ) else: self.logger.debug( f"NTP {ntp} has no offsets on node {node_key}") last_offset = int(manifest['last_offset']) self.logger.debug( f"last offset: {last_offset}, ntp offsets: {ntp_offsets}") assert (last_offset + 1) in ntp_offsets def _list_objects(self): """Emulate ListObjects call by fetching the topic manifests and iterating through its content""" try: topic_manifest_id = "d0000000/meta/kafka/panda-topic/topic_manifest.json" partition_manifest_id = "d0000000/meta/kafka/panda-topic/0_9/manifest.json" manifest = self.s3_client.get_object_data(self.s3_bucket_name, partition_manifest_id) results = [topic_manifest_id, partition_manifest_id] for id in manifest['segments'].keys(): results.append(id) self.logger.debug(f"ListObjects(source: manifest): {results}") except: results = [ loc.Key for loc in self.s3_client.list_objects(self.s3_bucket_name) ] self.logger.debug(f"ListObjects: {results}") return results def _quick_verify(self): """Verification algorithm that works only if no leadership transfer happend during the run. It works by looking up all segments from the remote storage in local redpanda storages. It's done by using md5 hashes of the nodes. """ local = {} for node in self.redpanda.nodes: checksums = self._get_redpanda_log_segment_checksums(node) self.logger.info( f"Node: {node.account.hostname} checksums: {checksums}") for k, v in checksums.items(): local.setdefault(k, set()).add(v) remote = self._get_redpanda_s3_checksums() self.logger.info(f"S3 checksums: {remote}") self.logger.info(f"Local checksums: {local}") assert len(local) != 0 assert len(remote) != 0 md5fails = 0 lookup_fails = 0 for path, csum in remote.items(): self.logger.info(f"checking remote path: {path} csum: {csum}") if path not in local: self.logger.debug( f"remote path {path} can't be found in any of the local storages" ) lookup_fails += 1 else: if len(local[path]) != 1: self.logger.info( f"remote segment {path} have more than one variant {local[path]}" ) if not csum in local[path]: self.logger.debug( f"remote md5 {csum} doesn't match any local {local[path]}" ) md5fails += 1 if md5fails != 0: self.logger.debug( f"Validation failed, {md5fails} remote segments doesn't match") if lookup_fails != 0: self.logger.debug( f"Validation failed, remote {lookup_fails} remote locations doesn't match local" ) assert md5fails == 0 and lookup_fails == 0 # Validate partitions # for every partition the segment with largest base offset shouldn't be # available in remote storage local_partitions = {} remote_partitions = {} for path, items in local.items(): meta = _parse_normalized_segment_path(path, '', 0) local_partitions.setdefault(meta.ntp, []).append((meta, items)) for path, items in remote.items(): meta = _parse_normalized_segment_path(path, '', 0) remote_partitions.setdefault(meta.ntp, []).append((meta, items)) self.logger.info( f"generated local partitions {local_partitions.keys()}") self.logger.info( f"generated remote partitions {remote_partitions.keys()}") # Download manifest for partitions manifests = {} for ntp in local_partitions.keys(): manifest = self._download_partition_manifest(ntp) manifests[ntp] = manifest self._verify_manifest(ntp, manifest, remote) # Check that all local partition are archived assert len(local_partitions) == 1 assert len(remote_partitions) == 1 missing_partitions = 0 for key in local_partitions.keys(): if key not in remote_partitions: self.logger.debug(f"partition {key} not found in remote set") missing_partitions += 1 assert missing_partitions == 0 def _get_redpanda_log_segment_checksums(self, node): """Get MD5 checksums of log segments that match the topic. The paths are normalized (<namespace>/<topic>/<partition>_<rev>/...).""" checksums = self.redpanda.data_checksum(node) # Filter out all unwanted paths def included(path): controller_log_prefix = os.path.join(RedpandaService.DATA_DIR, "redpanda") log_segment_extension = ".log" return not path.startswith( controller_log_prefix) and path.endswith(log_segment_extension) # Remove data dir from path def normalize_path(path): return os.path.relpath(path, RedpandaService.DATA_DIR) return { normalize_path(path): value for path, value in checksums.items() if included(path) } def _get_redpanda_s3_checksums(self): """Get MD5 checksums of log segments stored in S3 (minio). The paths are normalized (<namespace>/<topic>/<partition>_<rev>/...).""" def normalize(path): return path[9:] # 8-character hash + / def included(path): manifest_extension = ".json" return not path.endswith(manifest_extension) return { normalize(it.Key): (it.ETag, it.ContentLength) for it in self.s3_client.list_objects(self.s3_bucket_name) if included(it.Key) } def _get_partial_checksum(self, hostname, normalized_path, tail_bytes): """Compute md5 checksum of the last 'tail_bytes' of the file located on a node.""" node = None for n in self.redpanda.nodes: if n.account.hostname == hostname: node = n full_path = os.path.join(RedpandaService.DATA_DIR, normalized_path) cmd = f"tail -c {tail_bytes} {full_path} | md5sum" line = node.account.ssh_output(cmd) tokens = line.split() return tokens[0].decode() def _isolate(self, nodes, ips): """Isolate certain ips from the nodes using firewall rules""" cmd = [] for ip in ips: cmd.append(f"iptables -A INPUT -s {ip} -j DROP") cmd.append(f"iptables -A OUTPUT -d {ip} -j DROP") cmd = " && ".join(cmd) for node in nodes: node.account.ssh_output(cmd, allow_fail=False) def _rejoin(self, nodes, ips): """Remove firewall rules that isolate ips from the nodes""" cmd = [] for ip in ips: cmd.append(f"iptables -D INPUT -s {ip} -j DROP") cmd.append(f"iptables -D OUTPUT -d {ip} -j DROP") cmd = " && ".join(cmd) for node in nodes: node.account.ssh_output(cmd, allow_fail=False) def _host_name_to_ip_address(self, hostname): ip_host = self.redpanda.nodes[0].account.ssh_output( f'getent hosts {hostname}') return ip_host.split()[0].decode() def _get_s3_endpoint_ip(self): return self._host_name_to_ip_address(ArchivalTest.s3_host_name) def _get_rp_cluster_ips(self, nhosts=4): lst = [] for ix in range(1, nhosts + 1): h = f"rp_n{ix}_1" lst.append(self._host_name_to_ip_address(h)) return lst
def test_overlapping_changes(self): """ Check that while a movement is in flight, rules about overlapping operations are properly enforced. """ self.start_redpanda(num_nodes=4) node_ids = {1, 2, 3, 4} # Create topic with enough data that inter-node movement # will take a while. name = f"movetest" spec = TopicSpec(name=name, partition_count=1, replication_factor=3) self.client().create_topic(spec) # Wait for the partition to have a leader (`rpk produce` errors # out if it tries to write data before this) def partition_ready(): return KafkaCat(self.redpanda).get_partition_leader( name, 0)[0] is not None wait_until(partition_ready, timeout_sec=10, backoff_sec=0.5) # Write a substantial amount of data to the topic msg_size = 512 * 1024 write_bytes = 512 * 1024 * 1024 producer = RpkProducer(self._ctx, self.redpanda, name, msg_size=msg_size, msg_count=int(write_bytes / msg_size)) t1 = time.time() producer.start() # This is an absurdly low expected throughput, but necessarily # so to run reliably on current test runners, which share an EBS # backend among many parallel tests. 10MB/s has been empirically # shown to be too high an expectation. expect_bps = 1 * 1024 * 1024 expect_runtime = write_bytes / expect_bps producer.wait(timeout_sec=expect_runtime) self.logger.info( f"Write complete {write_bytes} in {time.time() - t1} seconds") # - Admin API redirects writes but not reads. Because we want synchronous # status after submitting operations, send all operations to the controller # leader. This is not necessary for operations to work, just to simplify # this test by letting it see synchronous status updates. # - Because we will later verify that a 503 is sent in response to # a move request to an in_progress topic, set retry_codes=[] to # disable default retries on 503. admin_node = self.redpanda.controller() admin = Admin(self.redpanda, default_node=admin_node, retry_codes=[]) # Start an inter-node move, which should take some time # to complete because of recovery network traffic assignments = self._get_assignments(admin, name, 0) new_node = list(node_ids - set([a['node_id'] for a in assignments]))[0] self.logger.info(f"old assignments: {assignments}") old_assignments = assignments assignments = assignments[1:] + [{'node_id': new_node, 'core': 0}] self.logger.info(f"new assignments: {assignments}") r = admin.set_partition_replicas(name, 0, assignments) r.raise_for_status() assert admin.get_partitions(name, 0)['status'] == "in_progress" # Another move should fail assert admin.get_partitions(name, 0)['status'] == "in_progress" try: r = admin.set_partition_replicas(name, 0, old_assignments) except requests.exceptions.HTTPError as e: assert e.response.status_code == 503 else: raise RuntimeError(f"Expected 503 but got {r.status_code}") # An update to partition properties should succeed # (issue https://github.com/vectorizedio/redpanda/issues/2300) rpk = RpkTool(self.redpanda) assert admin.get_partitions(name, 0)['status'] == "in_progress" rpk.alter_topic_config(name, "retention.ms", "3600000") # A deletion should succeed assert name in rpk.list_topics() assert admin.get_partitions(name, 0)['status'] == "in_progress" rpk.delete_topic(name) assert name not in rpk.list_topics()
def setUp(self): rpk = RpkTool(self.redpanda) super(ShadowIndexingTxTest, self).setUp() for topic in self.topics: rpk.alter_topic_config(topic.name, 'redpanda.remote.write', 'true') rpk.alter_topic_config(topic.name, 'redpanda.remote.read', 'true')