示例#1
0
def restore_node(config,
                 temp_dir,
                 backup_name,
                 in_place,
                 keep_auth,
                 seeds,
                 verify,
                 keyspaces,
                 tables,
                 use_sstableloader=False):

    if in_place and keep_auth:
        logging.error(
            'Cannot keep system_auth when restoring in-place. It would be overwritten'
        )
        sys.exit(1)

    storage = Storage(config=config.storage)

    if not use_sstableloader:
        restore_node_locally(config, temp_dir, backup_name, in_place,
                             keep_auth, seeds, storage, keyspaces, tables)
    else:
        restore_node_sstableloader(config, temp_dir, backup_name, in_place,
                                   keep_auth, seeds, storage, keyspaces,
                                   tables)

    if verify:
        verify_restore([socket.getfqdn()], config)
def restore_node(config,
                 temp_dir,
                 backup_name,
                 in_place,
                 keep_auth,
                 seeds,
                 verify,
                 keyspaces,
                 tables,
                 use_sstableloader=False):

    if in_place and keep_auth:
        logging.error(
            'Cannot keep system_auth when restoring in-place. It would be overwritten'
        )
        sys.exit(1)

    storage = Storage(config=config.storage)

    if not use_sstableloader:
        restore_node_locally(config, temp_dir, backup_name, in_place,
                             keep_auth, seeds, storage, keyspaces, tables)
    else:
        restore_node_sstableloader(config, temp_dir, backup_name, in_place,
                                   keep_auth, seeds, storage, keyspaces,
                                   tables)

    if verify:
        hostname_resolver = HostnameResolver(
            medusa.config.evaluate_boolean(
                config.cassandra.resolve_ip_addresses))
        verify_restore([hostname_resolver.resolve_fqdn()], config)
    def _restore_data(self):
        # create workdir on each target host
        # Later: distribute a credential
        # construct command for each target host
        # invoke `nohup medusa-wrapper #{command}` on each target host
        # wait for exit on each
        logging.info('Starting cluster restore...')
        logging.info('Working directory for this execution: {}'.format(
            self.work_dir))
        for target, sources in self.host_map.items():
            logging.info(
                'About to restore on {} using {} as backup source'.format(
                    target, sources))

        logging.info(
            'This will delete all data on the target nodes and replace it with backup {}.'
            .format(self.cluster_backup.name))

        proceed = None
        while (proceed != 'Y' and proceed != 'n') and not self.bypass_checks:
            proceed = input('Are you sure you want to proceed? (Y/n)')

        if proceed == 'n':
            err_msg = 'Restore manually cancelled'
            logging.error(err_msg)
            raise Exception(err_msg)

        # work out which nodes are seeds in the target cluster
        target_seeds = [t for t, s in self.host_map.items() if s['seed']]
        logging.info("target seeds : {}".format(target_seeds))
        # work out which nodes are seeds in the target cluster
        target_hosts = self.host_map.keys()
        logging.info("target hosts : {}".format(target_hosts))

        if self.use_sstableloader is False:
            # stop all target nodes
            logging.info('Stopping Cassandra on all nodes currently up')

            # Generate a Job ID for this run
            job_id = str(uuid.uuid4())
            logging.debug('Job id is: {}'.format(job_id))
            # Define command to run
            command = self.config.cassandra.stop_cmd
            logging.debug('Command to run is: {}'.format(command))

            self._pssh_run(target_hosts, command, hosts_variables={})

        else:
            # we're using the sstableloader, which will require to (re)create the schema and empty the tables
            logging.info("Restoring schema on the target cluster")
            self._restore_schema()

        # trigger restores everywhere at once
        # pass in seed info so that non-seeds can wait for seeds before starting
        # seeds, naturally, don't wait for anything

        # Generate a Job ID for this run
        hosts_variables = []
        for target, source in [(t, s['source'])
                               for t, s in self.host_map.items()]:
            logging.info('Restoring data on {}...'.format(target))
            seeds = '' if target in target_seeds or len(target_seeds) == 0 \
                    else '--seeds {}'.format(','.join(target_seeds))
            hosts_variables.append((','.join(source), seeds))
            command = self._build_restore_cmd(target, source, seeds)

        pssh_run_success = self._pssh_run(target_hosts,
                                          command,
                                          hosts_variables=hosts_variables)

        if not pssh_run_success:
            # we could implement a retry.
            err_msg = 'Some nodes failed to restore. Exiting'
            logging.error(err_msg)
            raise Exception(err_msg)

        logging.info(
            'Restore process is complete. The cluster should be up shortly.')

        if self.verify:
            verify_restore(target_hosts, self.config)
    def _restore_data(self):
        # create workdir on each target host
        # Later: distribute a credential
        # construct command for each target host
        # invoke `nohup medusa-wrapper #{command}` on each target host
        # wait for exit on each
        logging.info('Starting cluster restore...')
        logging.info('Working directory for this execution: {}'.format(self.work_dir))
        for target, sources in self.host_map.items():
            logging.info('About to restore on {} using {} as backup source'.format(target, sources))

        logging.info('This will delete all data on the target nodes and replace it with backup {}.'
                     .format(self.cluster_backup.name))

        proceed = None
        while (proceed != 'Y' and proceed != 'n') and not self.bypass_checks:
            proceed = input('Are you sure you want to proceed? (Y/n)')

        if proceed == 'n':
            err_msg = 'Restore manually cancelled'
            logging.error(err_msg)
            raise Exception(err_msg)

        if self.use_sstableloader is False:
            # stop all target nodes
            stop_remotes = []
            logging.info('Stopping Cassandra on all nodes')
            for target, source in [(t, s['source']) for t, s in self.host_map.items()]:
                client, connect_args = self._connect(target)
                if self.check_cassandra_running(target, client, connect_args):
                    logging.info('Cassandra is running on {}. Stopping it...'.format(target))
                    command = 'sh -c "{}"'.format(self.config.cassandra.stop_cmd)
                    stop_remotes.append(self._run(target, client, connect_args, command))
                else:
                    logging.info('Cassandra is not running on {}.'.format(target))

            # wait for all nodes to stop
            logging.info('Waiting for all nodes to stop...')
            finished, broken = self._wait_for(stop_remotes)
            if len(broken) > 0:
                err_msg = 'Some Cassandras failed to stop. Exiting'
                logging.error(err_msg)
                raise Exception(err_msg)
        else:
            # we're using the sstableloader, which will require to (re)create the schema and empty the tables
            logging.info("Restoring schema on the target cluster")
            self._restore_schema()

        # work out which nodes are seeds in the target cluster
        target_seeds = [t for t, s in self.host_map.items() if s['seed']]

        # trigger restores everywhere at once
        # pass in seed info so that non-seeds can wait for seeds before starting
        # seeds, naturally, don't wait for anything
        remotes = []
        for target, source in [(t, s['source']) for t, s in self.host_map.items()]:
            logging.info('Restoring data on {}...'.format(target))
            seeds = None if target in target_seeds else target_seeds
            remote = self._trigger_restore(target, source, seeds=seeds)
            remotes.append(remote)

        # wait for the restores
        logging.info('Starting to wait for the nodes to restore')
        finished, broken = self._wait_for(remotes)
        if len(broken) > 0:
            err_msg = 'Some nodes failed to restore. Exiting'
            logging.error(err_msg)
            raise Exception(err_msg)

        logging.info('Restore process is complete. The cluster should be up shortly.')

        if self.verify:
            hosts = list(map(lambda r: r.target, remotes))
            verify_restore(hosts, self.config)