Пример #1
0
  def __init__(self, log_level=logging.INFO):
    """
    Constructor.
    """
    class_name = self.__class__.__name__
    self.logger = logging.getLogger(class_name)
    self.logger.setLevel(log_level)
    self.logger.info('Starting {}'.format(class_name))

    self.hosts = appscale_info.get_db_ips()
    self.retry_policy = IdempotentRetryPolicy()
    self.no_retries = FallthroughRetryPolicy()

    remaining_retries = INITIAL_CONNECT_RETRIES
    while True:
      try:
        self.cluster = Cluster(self.hosts,
                               default_retry_policy=self.retry_policy)
        self.session = self.cluster.connect(KEYSPACE)
        break
      except cassandra.cluster.NoHostAvailable as connection_error:
        remaining_retries -= 1
        if remaining_retries < 0:
          raise connection_error
        time.sleep(3)

    self.session.default_consistency_level = ConsistencyLevel.QUORUM
Пример #2
0
def restore_data(path, keyname, force=False):
    """ Restores the Cassandra backup.

  Args:
    path: A string containing the location on each of the DB machines to use
      for restoring data.
    keyname: A string containing the deployment's keyname.
  Raises:
    BRException if unable to find any Cassandra machines or if DB machine has
      insufficient space.
  """
    logging.info("Starting new db restore.")

    db_ips = appscale_info.get_db_ips()
    if not db_ips:
        raise BRException("Unable to find any Cassandra machines.")

    machines_without_restore = []
    for db_ip in db_ips:
        exit_code = utils.ssh(db_ip, keyname, "ls {}".format(path), method=subprocess.call)
        if exit_code != ExitCodes.SUCCESS:
            machines_without_restore.append(db_ip)

    if machines_without_restore and not force:
        logging.info("The following machines do not have a restore file: {}".format(machines_without_restore))
        response = raw_input("Would you like to continue? [y/N] ")
        if response not in ["Y", "y"]:
            return

    for db_ip in db_ips:
        logging.info("Stopping Cassandra on {}".format(db_ip))
        summary = utils.ssh(db_ip, keyname, "monit summary", method=subprocess.check_output)
        status = utils.monit_status(summary, CASSANDRA_MONIT_WATCH_NAME)
        retries = SERVICE_STOP_RETRIES
        while status != MonitStates.UNMONITORED:
            utils.ssh(db_ip, keyname, "monit stop {}".format(CASSANDRA_MONIT_WATCH_NAME), method=subprocess.call)
            time.sleep(3)
            summary = utils.ssh(db_ip, keyname, "monit summary", method=subprocess.check_output)
            status = utils.monit_status(summary, CASSANDRA_MONIT_WATCH_NAME)
            retries -= 1
            if retries < 0:
                raise BRException("Unable to stop Cassandra")

    cassandra_dir = "{}/cassandra".format(APPSCALE_DATA_DIR)
    for db_ip in db_ips:
        logging.info("Restoring Cassandra data on {}".format(db_ip))
        clear_db = 'find {0} -regex ".*\.\(db\|txt\|log\)$" -exec rm {{}} \;'.format(cassandra_dir)
        utils.ssh(db_ip, keyname, clear_db)

        if db_ip not in machines_without_restore:
            utils.ssh(db_ip, keyname, "tar xf {} -C {}".format(path, cassandra_dir))
            utils.ssh(db_ip, keyname, "chown -R cassandra {}".format(cassandra_dir))

        utils.ssh(db_ip, keyname, "monit start {}".format(CASSANDRA_MONIT_WATCH_NAME))

    logging.info("Done with db restore.")
Пример #3
0
def backup_data(path, keyname):
    """ Backup Cassandra snapshot data directories/files.

  Args:
    path: A string containing the location to store the backup on each of the
      DB machines.
    keyname: A string containing the deployment's keyname.
  Raises:
    BRException if unable to find any Cassandra machines or if DB machine has
      insufficient space.
  """
    logging.info("Starting new db backup.")

    db_ips = appscale_info.get_db_ips()
    if not db_ips:
        raise BRException('Unable to find any Cassandra machines.')

    for db_ip in db_ips:
        utils.ssh(db_ip, keyname, '{} clearsnapshot'.format(NODE_TOOL))
        utils.ssh(db_ip, keyname, '{} snapshot'.format(NODE_TOOL))

        get_snapshot_size = 'find {0} -name "snapshots" -exec du -s {{}} \;'.\
          format(APPSCALE_DATA_DIR)
        du_output = utils.ssh(db_ip,
                              keyname,
                              get_snapshot_size,
                              method=subprocess.check_output)
        backup_size = sum(
            int(line.split()[0]) for line in du_output.split('\n') if line)

        output_dir = '/'.join(path.split('/')[:-1]) + '/'
        df_output = utils.ssh(db_ip,
                              keyname,
                              'df {}'.format(output_dir),
                              method=subprocess.check_output)
        available = int(df_output.split('\n')[1].split()[3])

        if backup_size > available * PADDING_PERCENTAGE:
            raise BRException('{} has insufficient space: {}/{}'.format(
                db_ip, available * PADDING_PERCENTAGE, backup_size))

    cassandra_dir = '{}/cassandra'.format(APPSCALE_DATA_DIR)
    for db_ip in db_ips:
        create_tar = 'find . -regex ".*/snapshots/[0-9]*/.*" -exec tar '\
          '--transform="s/snapshots\/[0-9]*\///" -cf {0} {{}} +'.format(path)
        utils.ssh(db_ip, keyname, 'cd {} && {}'.format(cassandra_dir,
                                                       create_tar))

    logging.info("Done with db backup.")
Пример #4
0
  def __init__(self):
    hosts = appscale_info.get_db_ips()

    remaining_retries = INITIAL_CONNECT_RETRIES
    while True:
      try:
        cluster = Cluster(hosts)
        self.session = cluster.connect(keyspace=KEYSPACE)
        break
      except cassandra.cluster.NoHostAvailable as connection_error:
        remaining_retries -= 1
        if remaining_retries < 0:
          raise connection_error
        time.sleep(3)

    self.session.default_consistency_level = ConsistencyLevel.QUORUM
Пример #5
0
  def __init__(self):
    hosts = appscale_info.get_db_ips()

    remaining_retries = INITIAL_CONNECT_RETRIES
    while True:
      try:
        cluster = Cluster(hosts)
        self.session = cluster.connect(keyspace=KEYSPACE)
        break
      except cassandra.cluster.NoHostAvailable as connection_error:
        remaining_retries -= 1
        if remaining_retries < 0:
          raise connection_error
        time.sleep(3)

    self.session.default_consistency_level = ConsistencyLevel.QUORUM
    self.retry_policy = IdempotentRetryPolicy()
Пример #6
0
    def __init__(self):
        hosts = appscale_info.get_db_ips()

        remaining_retries = INITIAL_CONNECT_RETRIES
        while True:
            try:
                # Cassandra 2.1 only supports up to Protocol Version 3.
                cluster = Cluster(hosts, protocol_version=3)
                self.session = cluster.connect(keyspace=KEYSPACE)
                break
            except cassandra.cluster.NoHostAvailable as connection_error:
                remaining_retries -= 1
                if remaining_retries < 0:
                    raise connection_error
                time.sleep(3)

        self.session.default_consistency_level = ConsistencyLevel.QUORUM
        self.retry_policy = IdempotentRetryPolicy()
Пример #7
0
def backup_data(path, keyname):
  """ Backup Cassandra snapshot data directories/files.

  Args:
    path: A string containing the location to store the backup on each of the
      DB machines.
    keyname: A string containing the deployment's keyname.
  Raises:
    BRException if unable to find any Cassandra machines or if DB machine has
      insufficient space.
  """
  logging.info("Starting new db backup.")

  db_ips = appscale_info.get_db_ips()
  if not db_ips:
    raise BRException('Unable to find any Cassandra machines.')

  for db_ip in db_ips:
    utils.ssh(db_ip, keyname, '{} clearsnapshot'.format(NODE_TOOL))
    utils.ssh(db_ip, keyname, '{} snapshot'.format(NODE_TOOL))

    get_snapshot_size = 'find {0} -name "snapshots" -exec du -s {{}} \;'.\
      format(APPSCALE_DATA_DIR)
    du_output = utils.ssh(db_ip, keyname, get_snapshot_size,
      method=subprocess.check_output)
    backup_size = sum(int(line.split()[0])
                      for line in du_output.split('\n') if line)

    output_dir = '/'.join(path.split('/')[:-1]) + '/'
    df_output = utils.ssh(db_ip, keyname, 'df {}'.format(output_dir),
      method=subprocess.check_output)
    available = int(df_output.split('\n')[1].split()[3])

    if backup_size > available * PADDING_PERCENTAGE:
      raise BRException('{} has insufficient space: {}/{}'.
        format(db_ip, available * PADDING_PERCENTAGE, backup_size))

  cassandra_dir = '{}/cassandra'.format(APPSCALE_DATA_DIR)
  for db_ip in db_ips:
    create_tar = 'find . -regex ".*/snapshots/[0-9]*/.*" -exec tar '\
      '--transform="s/snapshots\/[0-9]*\///" -cf {0} {{}} +'.format(path)
    utils.ssh(db_ip, keyname, 'cd {} && {}'.format(cassandra_dir, create_tar))

  logging.info("Done with db backup.")
Пример #8
0
def prime_cassandra(replication):
    """ Create Cassandra keyspace and initial tables.

  Args:
    replication: An integer specifying the replication factor for the keyspace.
  Raises:
    AppScaleBadArg if replication factor is not greater than 0.
    TypeError if replication is not an integer.
  """
    if not isinstance(replication, int):
        raise TypeError('Replication must be an integer')

    if int(replication) <= 0:
        raise dbconstants.AppScaleBadArg(
            'Replication must be greater than zero')

    hosts = appscale_info.get_db_ips()

    cluster = None
    session = None
    remaining_retries = INITIAL_CONNECT_RETRIES
    while True:
        try:
            cluster = Cluster(hosts)
            session = cluster.connect()
            break
        except cassandra.cluster.NoHostAvailable as connection_error:
            remaining_retries -= 1
            if remaining_retries < 0:
                raise connection_error
            time.sleep(3)
    session.default_consistency_level = ConsistencyLevel.QUORUM

    create_keyspace = """
    CREATE KEYSPACE IF NOT EXISTS "{keyspace}"
    WITH REPLICATION = %(replication)s
  """.format(keyspace=KEYSPACE)
    keyspace_replication = {
        'class': 'SimpleStrategy',
        'replication_factor': replication
    }
    session.execute(create_keyspace, {'replication': keyspace_replication})
    session.set_keyspace(KEYSPACE)

    for table in dbconstants.INITIAL_TABLES:
        create_table = """
      CREATE TABLE IF NOT EXISTS "{table}" (
        {key} blob,
        {column} text,
        {value} blob,
        PRIMARY KEY ({key}, {column})
      ) WITH COMPACT STORAGE
    """.format(table=table,
               key=ThriftColumn.KEY,
               column=ThriftColumn.COLUMN_NAME,
               value=ThriftColumn.VALUE)
        statement = SimpleStatement(create_table, retry_policy=NO_RETRIES)

        logging.info('Trying to create {}'.format(table))
        try:
            session.execute(statement)
        except cassandra.OperationTimedOut:
            logging.warning(
                'Encountered an operation timeout while creating {} table. '
                'Waiting 1 minute for schema to settle.'.format(table))
            time.sleep(60)
            raise

    create_batch_tables(cluster, session)
    create_groups_table(session)
    create_transactions_table(session)
    create_pull_queue_tables(cluster, session)

    first_entity = session.execute('SELECT * FROM "{}" LIMIT 1'.format(
        dbconstants.APP_ENTITY_TABLE))
    existing_entities = len(list(first_entity)) == 1

    define_ua_schema(session)

    metadata_insert = """
    INSERT INTO "{table}" ({key}, {column}, {value})
    VALUES (%(key)s, %(column)s, %(value)s)
  """.format(table=dbconstants.DATASTORE_METADATA_TABLE,
             key=ThriftColumn.KEY,
             column=ThriftColumn.COLUMN_NAME,
             value=ThriftColumn.VALUE)

    if not existing_entities:
        parameters = {
            'key': bytearray(cassandra_interface.VERSION_INFO_KEY),
            'column': cassandra_interface.VERSION_INFO_KEY,
            'value': bytearray(str(POST_JOURNAL_VERSION))
        }
        session.execute(metadata_insert, parameters)

        # Mark the newly created indexes as clean.
        parameters = {
            'key': bytearray(cassandra_interface.INDEX_STATE_KEY),
            'column': cassandra_interface.INDEX_STATE_KEY,
            'value': bytearray(str(IndexStates.CLEAN))
        }
        session.execute(metadata_insert, parameters)

    # Indicate that the database has been successfully primed.
    parameters = {
        'key': bytearray(cassandra_interface.PRIMED_KEY),
        'column': cassandra_interface.PRIMED_KEY,
        'value': bytearray('true')
    }
    session.execute(metadata_insert, parameters)
    logging.info('Cassandra is primed.')
Пример #9
0
def restore_data(path, keyname, force=False):
    """ Restores the Cassandra backup.

  Args:
    path: A string containing the location on each of the DB machines to use
      for restoring data.
    keyname: A string containing the deployment's keyname.
  Raises:
    BRException if unable to find any Cassandra machines or if DB machine has
      insufficient space.
  """
    logging.info("Starting new db restore.")

    db_ips = appscale_info.get_db_ips()
    if not db_ips:
        raise BRException('Unable to find any Cassandra machines.')

    machines_without_restore = []
    for db_ip in db_ips:
        exit_code = utils.ssh(db_ip,
                              keyname,
                              'ls {}'.format(path),
                              method=subprocess.call)
        if exit_code != ExitCodes.SUCCESS:
            machines_without_restore.append(db_ip)

    if machines_without_restore and not force:
        logging.info(
            'The following machines do not have a restore file: {}'.format(
                machines_without_restore))
        response = raw_input('Would you like to continue? [y/N] ')
        if response not in ['Y', 'y']:
            return

    for db_ip in db_ips:
        logging.info('Stopping Cassandra on {}'.format(db_ip))
        summary = utils.ssh(db_ip,
                            keyname,
                            'monit summary',
                            method=subprocess.check_output)
        status = utils.monit_status(summary, CASSANDRA_MONIT_WATCH_NAME)
        retries = SERVICE_STOP_RETRIES
        while status != MonitStates.UNMONITORED:
            utils.ssh(db_ip, keyname,
                      'monit stop {}'.format(CASSANDRA_MONIT_WATCH_NAME))
            time.sleep(1)
            summary = utils.ssh(db_ip,
                                keyname,
                                'monit summary',
                                method=subprocess.check_output)
            status = utils.monit_status(summary, CASSANDRA_MONIT_WATCH_NAME)
            retries -= 1
            if retries < 0:
                raise BRException('Unable to stop Cassandra')

    cassandra_dir = '{}/cassandra'.format(APPSCALE_DATA_DIR)
    for db_ip in db_ips:
        logging.info('Restoring Cassandra data on {}'.format(db_ip))
        clear_db = 'find {0} -regex ".*\.\(db\|txt\|log\)$" -exec rm {{}} \;'.\
          format(cassandra_dir)
        utils.ssh(db_ip, keyname, clear_db)

        if db_ip not in machines_without_restore:
            utils.ssh(db_ip, keyname,
                      'tar xf {} -C {}'.format(path, cassandra_dir))

        utils.ssh(db_ip, keyname,
                  'monit start {}'.format(CASSANDRA_MONIT_WATCH_NAME))

    logging.info("Done with db restore.")
Пример #10
0
def prime_cassandra(replication):
  """ Create Cassandra keyspace and initial tables.

  Args:
    replication: An integer specifying the replication factor for the keyspace.
  Raises:
    AppScaleBadArg if replication factor is not greater than 0.
    TypeError if replication is not an integer.
  """
  if not isinstance(replication, int):
    raise TypeError('Replication must be an integer')

  if int(replication) <= 0:
    raise dbconstants.AppScaleBadArg('Replication must be greater than zero')

  hosts = appscale_info.get_db_ips()

  cluster = None
  session = None
  remaining_retries = INITIAL_CONNECT_RETRIES
  while True:
    try:
      cluster = Cluster(hosts)
      session = cluster.connect()
      break
    except cassandra.cluster.NoHostAvailable as connection_error:
      remaining_retries -= 1
      if remaining_retries < 0:
        raise connection_error
      time.sleep(3)
  session.default_consistency_level = ConsistencyLevel.QUORUM

  create_keyspace = """
    CREATE KEYSPACE IF NOT EXISTS "{keyspace}"
    WITH REPLICATION = %(replication)s
  """.format(keyspace=KEYSPACE)
  keyspace_replication = {'class': 'SimpleStrategy',
                          'replication_factor': replication}
  session.execute(create_keyspace, {'replication': keyspace_replication})
  session.set_keyspace(KEYSPACE)

  for table in dbconstants.INITIAL_TABLES:
    create_table = """
      CREATE TABLE IF NOT EXISTS "{table}" (
        {key} blob,
        {column} text,
        {value} blob,
        PRIMARY KEY ({key}, {column})
      ) WITH COMPACT STORAGE
    """.format(table=table,
               key=ThriftColumn.KEY,
               column=ThriftColumn.COLUMN_NAME,
               value=ThriftColumn.VALUE)
    statement = SimpleStatement(create_table, retry_policy=NO_RETRIES)

    logging.info('Trying to create {}'.format(table))
    try:
      session.execute(statement)
    except cassandra.OperationTimedOut:
      logging.warning(
        'Encountered an operation timeout while creating {} table. '
        'Waiting 1 minute for schema to settle.'.format(table))
      time.sleep(60)
      raise

  create_batch_tables(cluster, session)
  create_pull_queue_tables(cluster, session)

  first_entity = session.execute(
    'SELECT * FROM "{}" LIMIT 1'.format(dbconstants.APP_ENTITY_TABLE))
  existing_entities = len(list(first_entity)) == 1

  define_ua_schema(session)

  metadata_insert = """
    INSERT INTO "{table}" ({key}, {column}, {value})
    VALUES (%(key)s, %(column)s, %(value)s)
  """.format(
    table=dbconstants.DATASTORE_METADATA_TABLE,
    key=ThriftColumn.KEY,
    column=ThriftColumn.COLUMN_NAME,
    value=ThriftColumn.VALUE
  )

  if not existing_entities:
    parameters = {'key': bytearray(cassandra_interface.VERSION_INFO_KEY),
                  'column': cassandra_interface.VERSION_INFO_KEY,
                  'value': bytearray(str(POST_JOURNAL_VERSION))}
    session.execute(metadata_insert, parameters)

  # Indicate that the database has been successfully primed.
  parameters = {'key': bytearray(cassandra_interface.PRIMED_KEY),
                'column': cassandra_interface.PRIMED_KEY,
                'value': bytearray('true')}
  session.execute(metadata_insert, parameters)
  logging.info('Cassandra is primed.')
Пример #11
0
def get_kind_averages(keys):
    """ Get an average size for each kind.

  Args:
    keys: A list of dictionaries containing keys.
  Returns:
    A dictionary listing the average size of each kind.
  """
    hosts = appscale_info.get_db_ips()
    cluster = Cluster(hosts, default_retry_policy=BASIC_RETRIES)
    session = cluster.connect(KEYSPACE)

    entities_by_kind = {}
    for key_dict in keys:
        key = key_dict['key']
        if is_entity(key):
            key_parts = key.split(KEY_DELIMITER)
            kind = key_parts[2].split(':')[0]
            kind_id = KEY_DELIMITER.join([key_parts[0], key_parts[1], kind])
            if kind_id not in entities_by_kind:
                entities_by_kind[kind_id] = {
                    'keys': [],
                    'size': 0,
                    'fetched': 0
                }
            entities_by_kind[kind_id]['keys'].append(key)

    for kind_id, kind in entities_by_kind.iteritems():
        shuffle(kind['keys'])

    if not entities_by_kind:
        return {}

    futures = []
    for _ in range(50):
        kind = choice(entities_by_kind.keys())
        try:
            key = entities_by_kind[kind]['keys'].pop()
        except IndexError:
            continue

        select = """
      SELECT {value} FROM "{table}"
      WHERE {key}=%(key)s AND {column}=%(column)s
    """.format(value=ThriftColumn.VALUE,
               table=APP_ENTITY_TABLE,
               key=ThriftColumn.KEY,
               column=ThriftColumn.COLUMN_NAME)
        parameters = {'key': bytearray(key), 'column': APP_ENTITY_SCHEMA[0]}
        future = session.execute_async(select, parameters)
        futures.append({'future': future, 'kind': kind})

    for future_dict in futures:
        future = future_dict['future']
        kind = future_dict['kind']
        try:
            entity = future.result()[0].value
        except IndexError:
            continue

        entities_by_kind[kind]['size'] += len(entity)
        entities_by_kind[kind]['fetched'] += 1

    kind_averages = {}
    for kind_id, kind in entities_by_kind.iteritems():
        try:
            kind_averages[kind_id] = int(kind['size'] / kind['fetched'])
        except ZeroDivisionError:
            kind_averages[kind_id] = 0

    return kind_averages
Пример #12
0
def get_kind_averages(keys):
  """ Get an average size for each kind.

  Args:
    keys: A list of dictionaries containing keys.
  Returns:
    A dictionary listing the average size of each kind.
  """
  hosts = appscale_info.get_db_ips()
  retry_policy = IdempotentRetryPolicy()
  cluster = Cluster(hosts, default_retry_policy=retry_policy)
  session = cluster.connect(KEYSPACE)

  entities_by_kind = {}
  for key_dict in keys:
    key = key_dict['key']
    if is_entity(key):
      key_parts = key.split(KEY_DELIMITER)
      kind = key_parts[2].split(':')[0]
      kind_id = KEY_DELIMITER.join([key_parts[0], key_parts[1], kind])
      if kind_id not in entities_by_kind:
        entities_by_kind[kind_id] = {'keys': [], 'size': 0, 'fetched': 0}
      entities_by_kind[kind_id]['keys'].append(key)

  for kind_id, kind in entities_by_kind.iteritems():
    shuffle(kind['keys'])

  if not entities_by_kind:
    return {}

  futures = []
  for _ in range(50):
    kind = choice(entities_by_kind.keys())
    try:
      key = entities_by_kind[kind]['keys'].pop()
    except IndexError:
      continue

    select = """
      SELECT {value} FROM "{table}"
      WHERE {key}=%(key)s AND {column}=%(column)s
    """.format(value=ThriftColumn.VALUE, table=APP_ENTITY_TABLE,
               key=ThriftColumn.KEY, column=ThriftColumn.COLUMN_NAME)
    parameters = {'key': bytearray(key), 'column': APP_ENTITY_SCHEMA[0]}
    future = session.execute_async(select, parameters)
    futures.append({'future': future, 'kind': kind})

  for future_dict in futures:
    future = future_dict['future']
    kind = future_dict['kind']
    try:
      entity = future.result()[0].value
    except IndexError:
      continue

    entities_by_kind[kind]['size'] += len(entity)
    entities_by_kind[kind]['fetched'] += 1

  kind_averages = {}
  for kind_id, kind in entities_by_kind.iteritems():
    try:
      kind_averages[kind_id] = int(kind['size'] / kind['fetched'])
    except ZeroDivisionError:
      kind_averages[kind_id] = 0

  return kind_averages