Пример #1
0
  def test_get_zk_node_ips(self):
    flexmock(file_io).should_receive("read").\
      and_return({"locations":["ip1", "ip2"],"last_updated_at":0})
    flexmock(json).should_receive("loads").\
      and_return({"locations":[u'ip1', u'ip2'],"last_updated_at":0})
    self.assertEquals(appscale_info.get_zk_node_ips(), [u'ip1', u'ip2'])

    flexmock(file_io).should_receive("read").and_raise(IOError)
    self.assertEquals(appscale_info.get_zk_node_ips(), [])
Пример #2
0
    def test_get_zk_node_ips(self):
        flexmock(file_io).should_receive("read").\
          and_return({"locations":["ip1", "ip2"],"last_updated_at":0})
        flexmock(json).should_receive("loads").\
          and_return({"locations":[u'ip1', u'ip2'],"last_updated_at":0})
        self.assertEquals(appscale_info.get_zk_node_ips(), [u'ip1', u'ip2'])

        flexmock(file_io).should_receive("read").and_raise(IOError)
        self.assertEquals(appscale_info.get_zk_node_ips(), [])
Пример #3
0
def backup_data(path, keyname):
  """ Backup Zookeeper data to path.

  Args:
    path: A str, the name of the backup file to be created.
    keyname: A string containing the deployment's keyname.
  Raises:
    BRException if unable to find any ZooKeeper machines.
  """
  logging.info("Starting new zk backup.")

  running = subprocess.call(['service', CONTROLLER_SERVICE, 'status']) == 0
  if not running:
    logging.error('Please start AppScale before backing up ZooKeeper.')
    sys.exit(1)

  # Stop ZooKeeper and backup data on only one ZooKeeper machine.
  # This is to avoid downtime on deployments with multiple ZooKeeper machines.
  zk_ips = appscale_info.get_zk_node_ips()
  if not zk_ips:
    raise BRException('Unable to find any ZooKeeper machines.')
  zk_ip = zk_ips[0]

  timestamp = int(time.time())
  backup_file = '{}/zk_backup_{}.tar.gz'.format(BACKUP_DIR_LOCATION, timestamp)
  try:
    utils.ssh(zk_ip, keyname, 'monit stop -g zookeeper')
    utils.ssh(zk_ip, keyname,
      'tar czf {} -C {} .'.format(backup_file, ZK_DATA_DIR))
    utils.scp_from(zk_ip, keyname, backup_file, path)
  finally:
    utils.ssh(zk_ip, keyname, 'rm -f {}'.format(backup_file))
    utils.ssh(zk_ip, keyname, 'monit start -g zookeeper')
Пример #4
0
def backup_data(path, keyname):
    """ Backup Zookeeper data to path.

  Args:
    path: A str, the name of the backup file to be created.
    keyname: A string containing the deployment's keyname.
  Raises:
    BRException if unable to find any ZooKeeper machines.
  """
    logging.info("Starting new zk backup.")

    running = subprocess.call(['service', CONTROLLER_SERVICE, 'status']) == 0
    if not running:
        logging.error('Please start AppScale before backing up ZooKeeper.')
        sys.exit(1)

    # Stop ZooKeeper and backup data on only one ZooKeeper machine.
    # This is to avoid downtime on deployments with multiple ZooKeeper machines.
    zk_ips = appscale_info.get_zk_node_ips()
    if not zk_ips:
        raise BRException('Unable to find any ZooKeeper machines.')
    zk_ip = zk_ips[0]

    timestamp = int(time.time())
    backup_file = '{}/zk_backup_{}.tar.gz'.format(BACKUP_DIR_LOCATION,
                                                  timestamp)
    try:
        utils.ssh(zk_ip, keyname, 'monit stop -g zookeeper')
        utils.ssh(zk_ip, keyname,
                  'tar czf {} -C {} .'.format(backup_file, ZK_DATA_DIR))
        utils.scp_from(zk_ip, keyname, backup_file, path)
    finally:
        utils.ssh(zk_ip, keyname, 'rm -f {}'.format(backup_file))
        utils.ssh(zk_ip, keyname, 'monit start -g zookeeper')
Пример #5
0
def get_node_info():
    """ Creates a list of JSON objects that contain node information and are
  needed to perform a backup/restore task on the current AppScale deployment.
  """

    # TODO
    # Add logic for choosing minimal set of nodes that need to perform a task.
    # e.g. Only the node that owns the entire keyspace.

    nodes = [{
        NodeInfoTags.HOST:
        get_br_service_url(appscale_info.get_db_master_ip()),
        NodeInfoTags.ROLE:
        'db_master',
        NodeInfoTags.INDEX:
        None
    }]

    index = 0
    for node in appscale_info.get_db_slave_ips():
        host = get_br_service_url(node)
        # Make sure we don't send the same request on DB roles that reside on the
        # same node.
        if host not in nodes[0].values():
            nodes.append({
                NodeInfoTags.HOST: host,
                NodeInfoTags.ROLE: 'db_slave',
                NodeInfoTags.INDEX: index
            })
            index += 1

    index = 0
    for node in appscale_info.get_zk_node_ips():
        nodes.append({
            NodeInfoTags.HOST: get_br_service_url(node),
            NodeInfoTags.ROLE: 'zk',
            NodeInfoTags.INDEX: index
        })
        index += 1

    return nodes
Пример #6
0
def get_node_info():
  """ Creates a list of JSON objects that contain node information and are
  needed to perform a backup/restore task on the current AppScale deployment.
  """

  # TODO
  # Add logic for choosing minimal set of nodes that need to perform a task.
  # e.g. Only the node that owns the entire keyspace.

  nodes = [{
    NodeInfoTags.HOST: get_br_service_url(appscale_info.get_db_master_ip()),
    NodeInfoTags.ROLE: 'db_master',
    NodeInfoTags.INDEX: None
  }]

  index = 0
  for node in appscale_info.get_db_slave_ips():
    host = get_br_service_url(node)
    # Make sure we don't send the same request on DB roles that reside on the
    # same node.
    if host not in nodes[0].values():
      nodes.append({
        NodeInfoTags.HOST: host,
        NodeInfoTags.ROLE: 'db_slave',
        NodeInfoTags.INDEX: index
      })
      index += 1

  index = 0
  for node in appscale_info.get_zk_node_ips():
    nodes.append({
      NodeInfoTags.HOST: get_br_service_url(node),
      NodeInfoTags.ROLE: 'zk',
      NodeInfoTags.INDEX: index
    })
    index += 1

  return nodes
Пример #7
0
def restore_data(path, keyname):
  """ Restores the Zookeeper snapshot.

  Args:
    path: A str, the name of the backup file to restore from.
    keyname: A string containing the deployment's keyname.
  Raises:
    BRException if unable to find any ZooKeeper machines.
  """
  logging.info("Starting new zk restore.")

  running = subprocess.call(['service', CONTROLLER_SERVICE, 'status']) == 0
  if running:
    logging.error('Please stop AppScale before restoring ZooKeeper.')
    sys.exit(1)

  zk_ips = appscale_info.get_zk_node_ips()
  if len(zk_ips) < 1:
    raise BRException('Unable to find any ZooKeeper machines.')

  timestamp = int(time.time())
  restore_file = '{}/zk_restore_{}.tar.gz'.\
    format(BACKUP_DIR_LOCATION, timestamp)

  # Cache name of ZooKeeper service for each machine.
  zk_service_names = {}
  for zk_ip in zk_ips:
    zk_service_names[zk_ip] = utils.zk_service_name(zk_ip, keyname)

  # Copy restore file to and start ZooKeeper on relevant machines.
  logging.info('Copying data to ZooKeeper machines.')
  for zk_ip in zk_ips:
    zk_service = zk_service_names[zk_ip]
    try:
      utils.scp_to(zk_ip, keyname, path, restore_file)
      utils.ssh(zk_ip, keyname, 'service {} restart'.format(zk_service))
    except subprocess.CalledProcessError as error:
      logging.exception('Failed to prepare restore on {}'.format(zk_ip))
      utils.ssh(zk_ip, keyname, 'rm -f {}'.format(restore_file))
      utils.ssh(zk_ip, keyname, 'service {} stop'.format(zk_service))
      raise error

  # Save deployment-specific data.
  deployment_data = StringIO()
  hosts_template = ':{port},'.join(zk_ips) + ':{port}'
  zk = kazoo.client.KazooClient(
    hosts=hosts_template.format(port=zktransaction.DEFAULT_PORT))
  zk.start()
  for zk_node in ZK_KEEP_PATHS:
    recursive_dump(zk, zk_node, deployment_data)
  zk.stop()

  # Stop ZooKeeper and clear existing data directory.
  logging.info('Clearing existing data on ZooKeeper machines.')
  for zk_ip in zk_ips:
    zk_service = zk_service_names[zk_ip]
    try:
      utils.ssh(zk_ip, keyname, 'service {} stop'.format(zk_service))
      utils.ssh(zk_ip, keyname, 'rm -rf {}/*'.format(ZK_DATA_DIR))
    except subprocess.CalledProcessError as error:
      logging.exception('Unable to clear data on {}'.format(zk_ip))
      deployment_data.close()
      utils.ssh(zk_ip, keyname, 'rm -f {}'.format(restore_file))
      utils.ssh(zk_ip, keyname, 'service {} stop'.format(zk_service))
      raise error

  # Restore data and restart ZooKeeper on relevant machines.
  logging.info('Restoring data on ZooKeeper machines.')
  for zk_ip in zk_ips:
    zk_service = zk_service_names[zk_ip]
    try:
      utils.ssh(zk_ip, keyname,
        'tar xzf {} -C {}'.format(restore_file, ZK_DATA_DIR))
      utils.ssh(zk_ip, keyname, 'service {} start'.format(zk_service))
    except subprocess.CalledProcessError as error:
      logging.exception('Unable to restore on {}'.format(zk_ip))
      deployment_data.close()
      utils.ssh(zk_ip, keyname, 'rm -f {}'.format(restore_file))
      utils.ssh(zk_ip, keyname, 'service {} stop'.format(zk_service))
      raise error

  # Restore deployment-specific data.
  logging.info('Restoring deployment-specific data.')
  zk = kazoo.client.KazooClient(hosts=':2181,'.join(zk_ips) + ':2181')
  zk.start()
  for zk_node in ZK_KEEP_PATHS:
    recursive_flush(zk, zk_node)
  deployment_data.seek(0)
  restore_zk(zk, deployment_data)
  zk.stop()

  # Stop ZooKeeper on relevant machines.
  logging.info('Stopping ZooKeeper.')
  for zk_ip in zk_ips:
    zk_service = zk_service_names[zk_ip]
    try:
      utils.ssh(zk_ip, keyname, 'service {} stop'.format(zk_service))
      utils.ssh(zk_ip, keyname, 'rm -rf {}'.format(restore_file))
    finally:
      deployment_data.close()

  logging.info("Done with zk restore.")
  return True
Пример #8
0
def restore_data(path, keyname):
    """ Restores the Zookeeper snapshot.

  Args:
    path: A str, the name of the backup file to restore from.
    keyname: A string containing the deployment's keyname.
  Raises:
    BRException if unable to find any ZooKeeper machines.
  """
    logging.info("Starting new zk restore.")

    running = subprocess.call(['service', CONTROLLER_SERVICE, 'status']) == 0
    if running:
        logging.error('Please stop AppScale before restoring ZooKeeper.')
        sys.exit(1)

    zk_ips = appscale_info.get_zk_node_ips()
    if len(zk_ips) < 1:
        raise BRException('Unable to find any ZooKeeper machines.')

    timestamp = int(time.time())
    restore_file = '{}/zk_restore_{}.tar.gz'.\
      format(BACKUP_DIR_LOCATION, timestamp)

    # Cache name of ZooKeeper service for each machine.
    zk_service_names = {}
    for zk_ip in zk_ips:
        zk_service_names[zk_ip] = utils.zk_service_name(zk_ip, keyname)

    # Copy restore file to and start ZooKeeper on relevant machines.
    logging.info('Copying data to ZooKeeper machines.')
    for zk_ip in zk_ips:
        zk_service = zk_service_names[zk_ip]
        try:
            utils.scp_to(zk_ip, keyname, path, restore_file)
            utils.ssh(zk_ip, keyname, 'service {} restart'.format(zk_service))
        except subprocess.CalledProcessError as error:
            logging.exception('Failed to prepare restore on {}'.format(zk_ip))
            utils.ssh(zk_ip, keyname, 'rm -f {}'.format(restore_file))
            utils.ssh(zk_ip, keyname, 'service {} stop'.format(zk_service))
            raise error

    # Save deployment-specific data.
    deployment_data = StringIO()
    hosts_template = ':{port},'.join(zk_ips) + ':{port}'
    zk = kazoo.client.KazooClient(hosts=hosts_template.format(
        port=zktransaction.DEFAULT_PORT))
    zk.start()
    for zk_node in ZK_KEEP_PATHS:
        recursive_dump(zk, zk_node, deployment_data)
    zk.stop()

    # Stop ZooKeeper and clear existing data directory.
    logging.info('Clearing existing data on ZooKeeper machines.')
    for zk_ip in zk_ips:
        zk_service = zk_service_names[zk_ip]
        try:
            utils.ssh(zk_ip, keyname, 'service {} stop'.format(zk_service))
            utils.ssh(zk_ip, keyname, 'rm -rf {}/*'.format(ZK_DATA_DIR))
        except subprocess.CalledProcessError as error:
            logging.exception('Unable to clear data on {}'.format(zk_ip))
            deployment_data.close()
            utils.ssh(zk_ip, keyname, 'rm -f {}'.format(restore_file))
            utils.ssh(zk_ip, keyname, 'service {} stop'.format(zk_service))
            raise error

    # Restore data and restart ZooKeeper on relevant machines.
    logging.info('Restoring data on ZooKeeper machines.')
    for zk_ip in zk_ips:
        zk_service = zk_service_names[zk_ip]
        try:
            utils.ssh(zk_ip, keyname,
                      'tar xzf {} -C {}'.format(restore_file, ZK_DATA_DIR))
            utils.ssh(zk_ip, keyname, 'service {} start'.format(zk_service))
        except subprocess.CalledProcessError as error:
            logging.exception('Unable to restore on {}'.format(zk_ip))
            deployment_data.close()
            utils.ssh(zk_ip, keyname, 'rm -f {}'.format(restore_file))
            utils.ssh(zk_ip, keyname, 'service {} stop'.format(zk_service))
            raise error

    # Restore deployment-specific data.
    logging.info('Restoring deployment-specific data.')
    zk = kazoo.client.KazooClient(hosts=':2181,'.join(zk_ips) + ':2181')
    zk.start()
    for zk_node in ZK_KEEP_PATHS:
        recursive_flush(zk, zk_node)
    deployment_data.seek(0)
    restore_zk(zk, deployment_data)
    zk.stop()

    # Stop ZooKeeper on relevant machines.
    logging.info('Stopping ZooKeeper.')
    for zk_ip in zk_ips:
        zk_service = zk_service_names[zk_ip]
        try:
            utils.ssh(zk_ip, keyname, 'service {} stop'.format(zk_service))
            utils.ssh(zk_ip, keyname, 'rm -rf {}'.format(restore_file))
        finally:
            deployment_data.close()

    logging.info("Done with zk restore.")
    return True