def test_get_zk_node_ips(self): flexmock(file_io).should_receive("read").\ and_return({"locations":["ip1", "ip2"],"last_updated_at":0}) flexmock(json).should_receive("loads").\ and_return({"locations":[u'ip1', u'ip2'],"last_updated_at":0}) self.assertEquals(appscale_info.get_zk_node_ips(), [u'ip1', u'ip2']) flexmock(file_io).should_receive("read").and_raise(IOError) self.assertEquals(appscale_info.get_zk_node_ips(), [])
def backup_data(path, keyname): """ Backup Zookeeper data to path. Args: path: A str, the name of the backup file to be created. keyname: A string containing the deployment's keyname. Raises: BRException if unable to find any ZooKeeper machines. """ logging.info("Starting new zk backup.") running = subprocess.call(['service', CONTROLLER_SERVICE, 'status']) == 0 if not running: logging.error('Please start AppScale before backing up ZooKeeper.') sys.exit(1) # Stop ZooKeeper and backup data on only one ZooKeeper machine. # This is to avoid downtime on deployments with multiple ZooKeeper machines. zk_ips = appscale_info.get_zk_node_ips() if not zk_ips: raise BRException('Unable to find any ZooKeeper machines.') zk_ip = zk_ips[0] timestamp = int(time.time()) backup_file = '{}/zk_backup_{}.tar.gz'.format(BACKUP_DIR_LOCATION, timestamp) try: utils.ssh(zk_ip, keyname, 'monit stop -g zookeeper') utils.ssh(zk_ip, keyname, 'tar czf {} -C {} .'.format(backup_file, ZK_DATA_DIR)) utils.scp_from(zk_ip, keyname, backup_file, path) finally: utils.ssh(zk_ip, keyname, 'rm -f {}'.format(backup_file)) utils.ssh(zk_ip, keyname, 'monit start -g zookeeper')
def get_node_info(): """ Creates a list of JSON objects that contain node information and are needed to perform a backup/restore task on the current AppScale deployment. """ # TODO # Add logic for choosing minimal set of nodes that need to perform a task. # e.g. Only the node that owns the entire keyspace. nodes = [{ NodeInfoTags.HOST: get_br_service_url(appscale_info.get_db_master_ip()), NodeInfoTags.ROLE: 'db_master', NodeInfoTags.INDEX: None }] index = 0 for node in appscale_info.get_db_slave_ips(): host = get_br_service_url(node) # Make sure we don't send the same request on DB roles that reside on the # same node. if host not in nodes[0].values(): nodes.append({ NodeInfoTags.HOST: host, NodeInfoTags.ROLE: 'db_slave', NodeInfoTags.INDEX: index }) index += 1 index = 0 for node in appscale_info.get_zk_node_ips(): nodes.append({ NodeInfoTags.HOST: get_br_service_url(node), NodeInfoTags.ROLE: 'zk', NodeInfoTags.INDEX: index }) index += 1 return nodes
def restore_data(path, keyname): """ Restores the Zookeeper snapshot. Args: path: A str, the name of the backup file to restore from. keyname: A string containing the deployment's keyname. Raises: BRException if unable to find any ZooKeeper machines. """ logging.info("Starting new zk restore.") running = subprocess.call(['service', CONTROLLER_SERVICE, 'status']) == 0 if running: logging.error('Please stop AppScale before restoring ZooKeeper.') sys.exit(1) zk_ips = appscale_info.get_zk_node_ips() if len(zk_ips) < 1: raise BRException('Unable to find any ZooKeeper machines.') timestamp = int(time.time()) restore_file = '{}/zk_restore_{}.tar.gz'.\ format(BACKUP_DIR_LOCATION, timestamp) # Cache name of ZooKeeper service for each machine. zk_service_names = {} for zk_ip in zk_ips: zk_service_names[zk_ip] = utils.zk_service_name(zk_ip, keyname) # Copy restore file to and start ZooKeeper on relevant machines. logging.info('Copying data to ZooKeeper machines.') for zk_ip in zk_ips: zk_service = zk_service_names[zk_ip] try: utils.scp_to(zk_ip, keyname, path, restore_file) utils.ssh(zk_ip, keyname, 'service {} restart'.format(zk_service)) except subprocess.CalledProcessError as error: logging.exception('Failed to prepare restore on {}'.format(zk_ip)) utils.ssh(zk_ip, keyname, 'rm -f {}'.format(restore_file)) utils.ssh(zk_ip, keyname, 'service {} stop'.format(zk_service)) raise error # Save deployment-specific data. deployment_data = StringIO() hosts_template = ':{port},'.join(zk_ips) + ':{port}' zk = kazoo.client.KazooClient( hosts=hosts_template.format(port=zktransaction.DEFAULT_PORT)) zk.start() for zk_node in ZK_KEEP_PATHS: recursive_dump(zk, zk_node, deployment_data) zk.stop() # Stop ZooKeeper and clear existing data directory. logging.info('Clearing existing data on ZooKeeper machines.') for zk_ip in zk_ips: zk_service = zk_service_names[zk_ip] try: utils.ssh(zk_ip, keyname, 'service {} stop'.format(zk_service)) utils.ssh(zk_ip, keyname, 'rm -rf {}/*'.format(ZK_DATA_DIR)) except subprocess.CalledProcessError as error: logging.exception('Unable to clear data on {}'.format(zk_ip)) deployment_data.close() utils.ssh(zk_ip, keyname, 'rm -f {}'.format(restore_file)) utils.ssh(zk_ip, keyname, 'service {} stop'.format(zk_service)) raise error # Restore data and restart ZooKeeper on relevant machines. logging.info('Restoring data on ZooKeeper machines.') for zk_ip in zk_ips: zk_service = zk_service_names[zk_ip] try: utils.ssh(zk_ip, keyname, 'tar xzf {} -C {}'.format(restore_file, ZK_DATA_DIR)) utils.ssh(zk_ip, keyname, 'service {} start'.format(zk_service)) except subprocess.CalledProcessError as error: logging.exception('Unable to restore on {}'.format(zk_ip)) deployment_data.close() utils.ssh(zk_ip, keyname, 'rm -f {}'.format(restore_file)) utils.ssh(zk_ip, keyname, 'service {} stop'.format(zk_service)) raise error # Restore deployment-specific data. logging.info('Restoring deployment-specific data.') zk = kazoo.client.KazooClient(hosts=':2181,'.join(zk_ips) + ':2181') zk.start() for zk_node in ZK_KEEP_PATHS: recursive_flush(zk, zk_node) deployment_data.seek(0) restore_zk(zk, deployment_data) zk.stop() # Stop ZooKeeper on relevant machines. logging.info('Stopping ZooKeeper.') for zk_ip in zk_ips: zk_service = zk_service_names[zk_ip] try: utils.ssh(zk_ip, keyname, 'service {} stop'.format(zk_service)) utils.ssh(zk_ip, keyname, 'rm -rf {}'.format(restore_file)) finally: deployment_data.close() logging.info("Done with zk restore.") return True
def restore_data(path, keyname): """ Restores the Zookeeper snapshot. Args: path: A str, the name of the backup file to restore from. keyname: A string containing the deployment's keyname. Raises: BRException if unable to find any ZooKeeper machines. """ logging.info("Starting new zk restore.") running = subprocess.call(['service', CONTROLLER_SERVICE, 'status']) == 0 if running: logging.error('Please stop AppScale before restoring ZooKeeper.') sys.exit(1) zk_ips = appscale_info.get_zk_node_ips() if len(zk_ips) < 1: raise BRException('Unable to find any ZooKeeper machines.') timestamp = int(time.time()) restore_file = '{}/zk_restore_{}.tar.gz'.\ format(BACKUP_DIR_LOCATION, timestamp) # Cache name of ZooKeeper service for each machine. zk_service_names = {} for zk_ip in zk_ips: zk_service_names[zk_ip] = utils.zk_service_name(zk_ip, keyname) # Copy restore file to and start ZooKeeper on relevant machines. logging.info('Copying data to ZooKeeper machines.') for zk_ip in zk_ips: zk_service = zk_service_names[zk_ip] try: utils.scp_to(zk_ip, keyname, path, restore_file) utils.ssh(zk_ip, keyname, 'service {} restart'.format(zk_service)) except subprocess.CalledProcessError as error: logging.exception('Failed to prepare restore on {}'.format(zk_ip)) utils.ssh(zk_ip, keyname, 'rm -f {}'.format(restore_file)) utils.ssh(zk_ip, keyname, 'service {} stop'.format(zk_service)) raise error # Save deployment-specific data. deployment_data = StringIO() hosts_template = ':{port},'.join(zk_ips) + ':{port}' zk = kazoo.client.KazooClient(hosts=hosts_template.format( port=zktransaction.DEFAULT_PORT)) zk.start() for zk_node in ZK_KEEP_PATHS: recursive_dump(zk, zk_node, deployment_data) zk.stop() # Stop ZooKeeper and clear existing data directory. logging.info('Clearing existing data on ZooKeeper machines.') for zk_ip in zk_ips: zk_service = zk_service_names[zk_ip] try: utils.ssh(zk_ip, keyname, 'service {} stop'.format(zk_service)) utils.ssh(zk_ip, keyname, 'rm -rf {}/*'.format(ZK_DATA_DIR)) except subprocess.CalledProcessError as error: logging.exception('Unable to clear data on {}'.format(zk_ip)) deployment_data.close() utils.ssh(zk_ip, keyname, 'rm -f {}'.format(restore_file)) utils.ssh(zk_ip, keyname, 'service {} stop'.format(zk_service)) raise error # Restore data and restart ZooKeeper on relevant machines. logging.info('Restoring data on ZooKeeper machines.') for zk_ip in zk_ips: zk_service = zk_service_names[zk_ip] try: utils.ssh(zk_ip, keyname, 'tar xzf {} -C {}'.format(restore_file, ZK_DATA_DIR)) utils.ssh(zk_ip, keyname, 'service {} start'.format(zk_service)) except subprocess.CalledProcessError as error: logging.exception('Unable to restore on {}'.format(zk_ip)) deployment_data.close() utils.ssh(zk_ip, keyname, 'rm -f {}'.format(restore_file)) utils.ssh(zk_ip, keyname, 'service {} stop'.format(zk_service)) raise error # Restore deployment-specific data. logging.info('Restoring deployment-specific data.') zk = kazoo.client.KazooClient(hosts=':2181,'.join(zk_ips) + ':2181') zk.start() for zk_node in ZK_KEEP_PATHS: recursive_flush(zk, zk_node) deployment_data.seek(0) restore_zk(zk, deployment_data) zk.stop() # Stop ZooKeeper on relevant machines. logging.info('Stopping ZooKeeper.') for zk_ip in zk_ips: zk_service = zk_service_names[zk_ip] try: utils.ssh(zk_ip, keyname, 'service {} stop'.format(zk_service)) utils.ssh(zk_ip, keyname, 'rm -rf {}'.format(restore_file)) finally: deployment_data.close() logging.info("Done with zk restore.") return True