def restore_node(config, temp_dir, backup_name, in_place, keep_auth, seeds, verify, keyspaces, tables, use_sstableloader=False): if in_place and keep_auth: logging.error( 'Cannot keep system_auth when restoring in-place. It would be overwritten' ) sys.exit(1) storage = Storage(config=config.storage) if not use_sstableloader: restore_node_locally(config, temp_dir, backup_name, in_place, keep_auth, seeds, storage, keyspaces, tables) else: restore_node_sstableloader(config, temp_dir, backup_name, in_place, keep_auth, seeds, storage, keyspaces, tables) if verify: verify_restore([socket.getfqdn()], config)
def restore_node(config, temp_dir, backup_name, in_place, keep_auth, seeds, verify, keyspaces, tables, use_sstableloader=False): if in_place and keep_auth: logging.error( 'Cannot keep system_auth when restoring in-place. It would be overwritten' ) sys.exit(1) storage = Storage(config=config.storage) if not use_sstableloader: restore_node_locally(config, temp_dir, backup_name, in_place, keep_auth, seeds, storage, keyspaces, tables) else: restore_node_sstableloader(config, temp_dir, backup_name, in_place, keep_auth, seeds, storage, keyspaces, tables) if verify: hostname_resolver = HostnameResolver( medusa.config.evaluate_boolean( config.cassandra.resolve_ip_addresses)) verify_restore([hostname_resolver.resolve_fqdn()], config)
def _restore_data(self): # create workdir on each target host # Later: distribute a credential # construct command for each target host # invoke `nohup medusa-wrapper #{command}` on each target host # wait for exit on each logging.info('Starting cluster restore...') logging.info('Working directory for this execution: {}'.format( self.work_dir)) for target, sources in self.host_map.items(): logging.info( 'About to restore on {} using {} as backup source'.format( target, sources)) logging.info( 'This will delete all data on the target nodes and replace it with backup {}.' .format(self.cluster_backup.name)) proceed = None while (proceed != 'Y' and proceed != 'n') and not self.bypass_checks: proceed = input('Are you sure you want to proceed? (Y/n)') if proceed == 'n': err_msg = 'Restore manually cancelled' logging.error(err_msg) raise Exception(err_msg) # work out which nodes are seeds in the target cluster target_seeds = [t for t, s in self.host_map.items() if s['seed']] logging.info("target seeds : {}".format(target_seeds)) # work out which nodes are seeds in the target cluster target_hosts = self.host_map.keys() logging.info("target hosts : {}".format(target_hosts)) if self.use_sstableloader is False: # stop all target nodes logging.info('Stopping Cassandra on all nodes currently up') # Generate a Job ID for this run job_id = str(uuid.uuid4()) logging.debug('Job id is: {}'.format(job_id)) # Define command to run command = self.config.cassandra.stop_cmd logging.debug('Command to run is: {}'.format(command)) self._pssh_run(target_hosts, command, hosts_variables={}) else: # we're using the sstableloader, which will require to (re)create the schema and empty the tables logging.info("Restoring schema on the target cluster") self._restore_schema() # trigger restores everywhere at once # pass in seed info so that non-seeds can wait for seeds before starting # seeds, naturally, don't wait for anything # Generate a Job ID for this run hosts_variables = [] for target, source in [(t, s['source']) for t, s in self.host_map.items()]: logging.info('Restoring data on {}...'.format(target)) seeds = '' if target in target_seeds or len(target_seeds) == 0 \ else '--seeds {}'.format(','.join(target_seeds)) hosts_variables.append((','.join(source), seeds)) command = self._build_restore_cmd(target, source, seeds) pssh_run_success = self._pssh_run(target_hosts, command, hosts_variables=hosts_variables) if not pssh_run_success: # we could implement a retry. err_msg = 'Some nodes failed to restore. Exiting' logging.error(err_msg) raise Exception(err_msg) logging.info( 'Restore process is complete. The cluster should be up shortly.') if self.verify: verify_restore(target_hosts, self.config)
def _restore_data(self): # create workdir on each target host # Later: distribute a credential # construct command for each target host # invoke `nohup medusa-wrapper #{command}` on each target host # wait for exit on each logging.info('Starting cluster restore...') logging.info('Working directory for this execution: {}'.format(self.work_dir)) for target, sources in self.host_map.items(): logging.info('About to restore on {} using {} as backup source'.format(target, sources)) logging.info('This will delete all data on the target nodes and replace it with backup {}.' .format(self.cluster_backup.name)) proceed = None while (proceed != 'Y' and proceed != 'n') and not self.bypass_checks: proceed = input('Are you sure you want to proceed? (Y/n)') if proceed == 'n': err_msg = 'Restore manually cancelled' logging.error(err_msg) raise Exception(err_msg) if self.use_sstableloader is False: # stop all target nodes stop_remotes = [] logging.info('Stopping Cassandra on all nodes') for target, source in [(t, s['source']) for t, s in self.host_map.items()]: client, connect_args = self._connect(target) if self.check_cassandra_running(target, client, connect_args): logging.info('Cassandra is running on {}. Stopping it...'.format(target)) command = 'sh -c "{}"'.format(self.config.cassandra.stop_cmd) stop_remotes.append(self._run(target, client, connect_args, command)) else: logging.info('Cassandra is not running on {}.'.format(target)) # wait for all nodes to stop logging.info('Waiting for all nodes to stop...') finished, broken = self._wait_for(stop_remotes) if len(broken) > 0: err_msg = 'Some Cassandras failed to stop. Exiting' logging.error(err_msg) raise Exception(err_msg) else: # we're using the sstableloader, which will require to (re)create the schema and empty the tables logging.info("Restoring schema on the target cluster") self._restore_schema() # work out which nodes are seeds in the target cluster target_seeds = [t for t, s in self.host_map.items() if s['seed']] # trigger restores everywhere at once # pass in seed info so that non-seeds can wait for seeds before starting # seeds, naturally, don't wait for anything remotes = [] for target, source in [(t, s['source']) for t, s in self.host_map.items()]: logging.info('Restoring data on {}...'.format(target)) seeds = None if target in target_seeds else target_seeds remote = self._trigger_restore(target, source, seeds=seeds) remotes.append(remote) # wait for the restores logging.info('Starting to wait for the nodes to restore') finished, broken = self._wait_for(remotes) if len(broken) > 0: err_msg = 'Some nodes failed to restore. Exiting' logging.error(err_msg) raise Exception(err_msg) logging.info('Restore process is complete. The cluster should be up shortly.') if self.verify: hosts = list(map(lambda r: r.target, remotes)) verify_restore(hosts, self.config)