Exemplo n.º 1
0
    def starter_run_impl(self):
        lh.subsection("instance setup")
        for manager in self.starter_instances:
            logging.info("Spawning instance")
            manager.run_starter()

        logging.info("waiting for the starters to become alive")
        not_started = self.starter_instances[:]  # This is a explicit copy
        while not_started:
            logging.debug("waiting for mananger with logfile:" +
                          str(not_started[-1].log_file))
            if not_started[-1].is_instance_up():
                not_started.pop()
            progress(".")
            time.sleep(1)

        logging.info("waiting for the cluster instances to become alive")
        for node in self.starter_instances:
            node.detect_instances()
            node.detect_instance_pids()
            # self.basecfg.add_frontend('http', self.basecfg.publicip, str(node.get_frontend_port()))
        logging.info("instances are ready - JWT: " +
                     self.starter_instances[0].get_jwt_header())
        count = 0
        for node in self.starter_instances:
            node.set_passvoid("cluster", count == 0)
            count += 1
        self.passvoid = "cluster"
Exemplo n.º 2
0
    def jam_attempt_impl(self):
        self.first_leader.terminate_instance()
        logging.info("waiting for new leader...")
        self.new_leader = None

        while self.new_leader is None:
            for node in self.follower_nodes:
                node.detect_leader()
                if node.is_leader:
                    logging.info('have a new leader: %s', str(node.arguments))
                    self.new_leader = node
                    self.leader = node
                    break
                progress('.')
            time.sleep(1)
        if self.selenium:
            self.selenium.connect_server(self.leader.get_frontends(), '_system',
                                         self.new_cfg if self.new_cfg else self.cfg)
            self.selenium.check_old(self.new_cfg if self.new_cfg else self.cfg, 1)
        print()

        logging.info(str(self.new_leader))
        url = '{host}/_db/_system/_admin/aardvark/index.html#replication'.format(
            host=self.new_leader.get_frontend().get_local_url(''))
        reply = requests.get(url, auth=HTTPBasicAuth('root', self.leader.passvoid))
        logging.info(str(reply))
        if reply.status_code != 200:
            logging.info(reply.text)
            self.success = False
        self.set_frontend_instances()

        prompt_user(self.basecfg,
                    '''The leader failover has happened.
please revalidate the UI states on the new leader; you should see *one* follower.''')
        self.first_leader.respawn_instance()
        self.first_leader.detect_instances()
        logging.info("waiting for old leader to show up as follower")
        while not self.first_leader.active_failover_detect_host_now_follower():
            progress('.')
            time.sleep(1)
        print()

        url = self.first_leader.get_frontend().get_local_url('')

        reply = requests.get(url, auth=HTTPBasicAuth('root', self.leader.passvoid))
        logging.info(str(reply))
        logging.info(str(reply.text))

        if reply.status_code != 503:
            self.success = False

        prompt_user(self.basecfg,
                    'The old leader has been respawned as follower (%s),'
                    ' so there should be two followers again.'
                    % self.first_leader.get_frontend().get_public_url('root@') )

        logging.info("state of this test is: %s",
                     "Success" if self.success else "Failed")
        if self.selenium:
            self.selenium.check_old(self.new_cfg if self.new_cfg else self.cfg, 2, 20)
Exemplo n.º 3
0
    def upload_status(self, backup_name: str, status_id: str, instance_count: int, timeout: int = 180):
        """checking the progress of up/download"""
        args = [
            "upload",
            "--status-id",
            status_id,
        ]
        while True:
            out = self.run_backup(args, backup_name, True)
            progress(".")
            counts = {
                "ACK": 0,
                "STARTED": 0,
                "COMPLETED": 0,
                "FAILED": 0,
                "CANCELLED": 0,
            }
            for line in out.split("\n"):
                match = re.match(r".*Status: (.*)", str(line))
                if match:
                    which = match.group(1)
                    try:
                        counts[which] += 1
                    except AttributeError:
                        print("Line with unknown status [%s]: %s %s" % (which, line, str(counts)))

            if counts["COMPLETED"] == instance_count:
                print("all nodes have completed to restore the backup")
                return
            if counts["FAILED"] > 0:
                raise Exception("failed to create backup: " + str(out))
            print("have to retry. " + str(counts) + " - " + str(instance_count))
            timeout -= 1
            if timeout <= 0:
                raise TimeoutError("failed to find %d 'COMPLETED' status for upload status" % instance_count)
            time.sleep(1)