def test_capi_with_checkpointing(self): self.setup_xdcr() self.src_cluster.pause_all_replications() gen = DocumentGenerator('es', '{{"key":"value","mutated":0}}', range(100), start=0, end=self._num_items) self.src_cluster.load_all_buckets_from_generator(gen) self.src_cluster.resume_all_replications() self._wait_for_replication_to_catchup() self.sleep(120) vb0_node = None nodes = self.src_cluster.get_nodes() ip = VBucketAwareMemcached(self.rest, 'default').vBucketMap[0].split(':')[0] for node in nodes: if ip == node.ip: vb0_node = node if not vb0_node: raise XDCRCheckpointException("Error determining the node containing active vb0") vb0_conn = RestConnection(vb0_node) try: repl = vb0_conn.get_replication_for_buckets('default', 'default') checkpoint_record = vb0_conn.get_recent_xdcr_vb_ckpt(repl['id']) self.log.info("Checkpoint record : {0}".format(checkpoint_record)) except Exception as e: raise XDCRCheckpointException("Error retrieving last checkpoint document - {0}".format(e)) self._verify_es_results()
def validate_remote_failover_log(self, vb_uuid, high_seqno): # TAP based validation remote_uuid, remote_highseq = self.get_failover_log(self.dest_master) self.log.info("Remote failover log = [{0},{1}]".format(remote_uuid, remote_highseq)) if int(remote_uuid) != int(vb_uuid): raise XDCRCheckpointException("vb_uuid in commitopaque is {0} while actual remote vb_uuid is {1}" .format(vb_uuid, remote_uuid))
def get_and_validate_latest_checkpoint(self): rest_con = RestConnection(self.get_active_vb0_node(self.src_master)) repl = rest_con.get_replication_for_buckets('default', 'default') try: checkpoint_record = rest_con.get_recent_xdcr_vb_ckpt(repl['id']) self.log.info("Checkpoint record : {0}".format(checkpoint_record)) self.chkpt_records.append(checkpoint_record) except Exception as e: raise XDCRCheckpointException("Error retrieving last checkpoint document - {0}".format(e)) failover_uuid = checkpoint_record["failover_uuid"] seqno = checkpoint_record["seqno"] self.log.info ("Verifying commitopaque/remote failover log ...") if seqno != 0: self.validate_remote_failover_log(checkpoint_record["target_vb_opaque"]["target_vb_uuid"], checkpoint_record["target_seqno"]) self.log.info ("Verifying local failover uuid ...") local_vb_uuid, _ = self.get_failover_log(self.src_master) self.assertTrue((int(failover_uuid) == int(local_vb_uuid)) or (int(failover_uuid) == 0), "local failover_uuid is wrong in checkpoint record! Expected: {0} seen: {1}". format(local_vb_uuid, failover_uuid)) self.log.info("Checkpoint record verified") else: self.log.info("Skipping checkpoint record checks for checkpoint-0") return True
def get_and_validate_latest_checkpoint(self): rest_con = RestConnection(self.src_master) try: checkpoint_record = rest_con.get_recent_xdcr_vb_ckpt('default') self.log.info("Checkpoint record : {}".format(checkpoint_record)) self.chkpt_records.append(checkpoint_record) except Exception as e: raise XDCRCheckpointException( "Error retrieving last checkpoint document - {}".format(e)) commit_opaque = checkpoint_record["commitopaque"] failover_uuid = checkpoint_record["failover_uuid"] seqno = checkpoint_record["seqno"] self.log.info("Verifying commitopaque/remote failover log ...") if seqno != 0: if rest_con.is_goxdcr_enabled(): self.validate_remote_failover_log( checkpoint_record["target_vb_uuid"], checkpoint_record["commitopaque"]) else: self.validate_remote_failover_log(commit_opaque[0], commit_opaque[1]) self.log.info("Verifying local failover uuid ...") local_vb_uuid, _ = self.get_failover_log(self.src_master) self.assertTrue( int(local_vb_uuid) == int(failover_uuid), "local failover_uuid is wrong in checkpoint record! Expected: {0} seen: {1}" .format(local_vb_uuid, failover_uuid)) self.log.info("Checkpoint record verified") else: self.log.info("Skipping checkpoint record checks for checkpoint-0") return True
def wait_for_checkpoint_to_happen(self, timeout=180): """ Keeps checking if num_checkpoints stat for the replication was incremented, every 10 sec, times out after 2 mins """ end_time = time.time() + timeout while time.time() < end_time: num_success_ckpts =self.get_stat_successful_checkpoints() if num_success_ckpts > self.stat_num_success_ckpts: return else: self.sleep(10) else: raise XDCRCheckpointException("Timed-out waiting for checkpoint to happen")