Exemplo n.º 1
0
    def __init__(self, project_id, db_access):
        """ Creates a new BatchResolver.

    Args:
      project_id: A string specifying a project ID.
      db_access: A DatastoreProxy.
    """
        self.project_id = project_id

        self._db_access = db_access
        self._session = self._db_access.session
        self._tornado_cassandra = TornadoCassandra(self._session)
        self._prepared_statements = {}
Exemplo n.º 2
0
    def __init__(self, session, project, scattered=False):
        """ Creates a new EntityIDAllocator object.

    Args:
      session: A cassandra-drivers session object.
      project: A string specifying a project ID.
    """
        self.project = project
        self.session = session
        self.tornado_cassandra = TornadoCassandra(self.session)
        self.scattered = scattered
        if scattered:
            self.max_allowed = _MAX_SCATTERED_COUNTER
        else:
            self.max_allowed = _MAX_SEQUENTIAL_COUNTER

        # Allows the allocator to avoid making unnecessary Cassandra requests when
        # setting the minimum counter value.
        self._last_reserved_cache = None
Exemplo n.º 3
0
    def __init__(self, log_level=logging.INFO, hosts=None):
        """
    Constructor.
    """
        class_name = self.__class__.__name__
        self.logger = logging.getLogger(class_name)
        self.logger.setLevel(log_level)
        self.logger.info('Starting {}'.format(class_name))

        if hosts is not None:
            self.hosts = hosts
        else:
            self.hosts = appscale_info.get_db_ips()

        remaining_retries = INITIAL_CONNECT_RETRIES
        while True:
            try:
                self.cluster = Cluster(self.hosts,
                                       default_retry_policy=BASIC_RETRIES,
                                       load_balancing_policy=LB_POLICY)
                self.session = self.cluster.connect(KEYSPACE)
                self.tornado_cassandra = TornadoCassandra(self.session)
                break
            except cassandra.cluster.NoHostAvailable as connection_error:
                remaining_retries -= 1
                if remaining_retries < 0:
                    raise connection_error
                time.sleep(3)

        self.session.default_consistency_level = ConsistencyLevel.QUORUM
        self.prepared_statements = {}

        # Provide synchronous version of some async methods
        self.batch_get_entity_sync = tornado_synchronous(self.batch_get_entity)
        self.batch_put_entity_sync = tornado_synchronous(self.batch_put_entity)
        self.batch_delete_sync = tornado_synchronous(self.batch_delete)
        self.valid_data_version_sync = tornado_synchronous(
            self.valid_data_version)
        self.range_query_sync = tornado_synchronous(self.range_query)
        self.get_metadata_sync = tornado_synchronous(self.get_metadata)
        self.set_metadata_sync = tornado_synchronous(self.set_metadata)
        self.get_indices_sync = tornado_synchronous(self.get_indices)
        self.delete_table_sync = tornado_synchronous(self.delete_table)
Exemplo n.º 4
0
    def __init__(self):
        hosts = appscale_info.get_db_ips()

        remaining_retries = INITIAL_CONNECT_RETRIES
        while True:
            try:
                cluster = Cluster(hosts, load_balancing_policy=LB_POLICY)
                self.session = cluster.connect(keyspace=KEYSPACE)
                self.tornado_cassandra = TornadoCassandra(self.session)
                break
            except cassandra.cluster.NoHostAvailable as connection_error:
                remaining_retries -= 1
                if remaining_retries < 0:
                    raise connection_error
                time.sleep(3)

        self.session.default_consistency_level = ConsistencyLevel.QUORUM

        # Provide synchronous version of get_schema method
        self.get_schema_sync = tornado_synchronous(self.get_schema)
Exemplo n.º 5
0
    def __init__(self, session, project, txid):
        """ Create a new LargeBatch object.

    Args:
      session: A cassandra-driver session.
      project: A string specifying a project ID.
      txid: An integer specifying a transaction ID.
    """
        self.session = session
        self.tornado_cassandra = TornadoCassandra(self.session)
        self.project = project
        self.txid = txid

        # Create an identifier so that it's possible to check if operations succeed
        # after a timeout.
        self.op_id = uuid.uuid4()

        # This value is used when claiming an existing failed batch.
        self.read_op_id = None

        # Indicates if the batch has been applied.
        self.applied = False
Exemplo n.º 6
0
  def __init__(self, project_id, db_access):
    """ Creates a new BatchResolver.

    Args:
      project_id: A string specifying a project ID.
      db_access: A DatastoreProxy.
    """
    self.project_id = project_id

    self._db_access = db_access
    self._session = self._db_access.session
    self._tornado_cassandra = TornadoCassandra(self._session)
    self._prepared_statements = {}
Exemplo n.º 7
0
  def __init__(self, session, project, scattered=False):
    """ Creates a new EntityIDAllocator object.

    Args:
      session: A cassandra-drivers session object.
      project: A string specifying a project ID.
    """
    self.project = project
    self.session = session
    self.tornado_cassandra = TornadoCassandra(self.session)
    self.scattered = scattered
    if scattered:
      self.max_allowed = _MAX_SCATTERED_COUNTER
    else:
      self.max_allowed = _MAX_SEQUENTIAL_COUNTER

    # Allows the allocator to avoid making unnecessary Cassandra requests when
    # setting the minimum counter value.
    self._last_reserved_cache = None
Exemplo n.º 8
0
  def __init__(self):
    hosts = appscale_info.get_db_ips()

    remaining_retries = INITIAL_CONNECT_RETRIES
    while True:
      try:
        cluster = Cluster(hosts, load_balancing_policy=LB_POLICY)
        self.session = cluster.connect(keyspace=KEYSPACE)
        self.tornado_cassandra = TornadoCassandra(self.session)
        break
      except cassandra.cluster.NoHostAvailable as connection_error:
        remaining_retries -= 1
        if remaining_retries < 0:
          raise connection_error
        time.sleep(3)

    self.session.default_consistency_level = ConsistencyLevel.QUORUM

    # Provide synchronous version of get_schema method
    self.get_schema_sync = tornado_synchronous(self.get_schema)
Exemplo n.º 9
0
  def __init__(self, log_level=logging.INFO, hosts=None):
    """
    Constructor.
    """
    class_name = self.__class__.__name__
    self.logger = logging.getLogger(class_name)
    self.logger.setLevel(log_level)
    self.logger.info('Starting {}'.format(class_name))

    if hosts is not None:
      self.hosts = hosts
    else:
      self.hosts = appscale_info.get_db_ips()

    remaining_retries = INITIAL_CONNECT_RETRIES
    while True:
      try:
        self.cluster = Cluster(self.hosts, default_retry_policy=BASIC_RETRIES,
                               load_balancing_policy=LB_POLICY)
        self.session = self.cluster.connect(KEYSPACE)
        self.tornado_cassandra = TornadoCassandra(self.session)
        break
      except cassandra.cluster.NoHostAvailable as connection_error:
        remaining_retries -= 1
        if remaining_retries < 0:
          raise connection_error
        time.sleep(3)

    self.session.default_consistency_level = ConsistencyLevel.QUORUM
    self.prepared_statements = {}

    # Provide synchronous version of some async methods
    self.batch_get_entity_sync = tornado_synchronous(self.batch_get_entity)
    self.batch_put_entity_sync = tornado_synchronous(self.batch_put_entity)
    self.batch_delete_sync = tornado_synchronous(self.batch_delete)
    self.valid_data_version_sync = tornado_synchronous(self.valid_data_version)
    self.range_query_sync = tornado_synchronous(self.range_query)
    self.get_metadata_sync = tornado_synchronous(self.get_metadata)
    self.set_metadata_sync = tornado_synchronous(self.set_metadata)
    self.get_indices_sync = tornado_synchronous(self.get_indices)
    self.delete_table_sync = tornado_synchronous(self.delete_table)
Exemplo n.º 10
0
  def __init__(self, session, project, txid):
    """ Create a new LargeBatch object.

    Args:
      session: A cassandra-driver session.
      project: A string specifying a project ID.
      txid: An integer specifying a transaction ID.
    """
    self.session = session
    self.tornado_cassandra = TornadoCassandra(self.session)
    self.project = project
    self.txid = txid

    # Create an identifier so that it's possible to check if operations succeed
    # after a timeout.
    self.op_id = uuid.uuid4()

    # This value is used when claiming an existing failed batch.
    self.read_op_id = None

    # Indicates if the batch has been applied.
    self.applied = False
Exemplo n.º 11
0
class EntityIDAllocator(object):
    """ Keeps track of reserved entity IDs for a project. """
    def __init__(self, session, project, scattered=False):
        """ Creates a new EntityIDAllocator object.

    Args:
      session: A cassandra-drivers session object.
      project: A string specifying a project ID.
    """
        self.project = project
        self.session = session
        self.tornado_cassandra = TornadoCassandra(self.session)
        self.scattered = scattered
        if scattered:
            self.max_allowed = _MAX_SCATTERED_COUNTER
        else:
            self.max_allowed = _MAX_SEQUENTIAL_COUNTER

        # Allows the allocator to avoid making unnecessary Cassandra requests when
        # setting the minimum counter value.
        self._last_reserved_cache = None

    @gen.coroutine
    def _ensure_entry(self, retries=5):
        """ Ensures an entry exists for a reservation.

    Args:
      retries: The number of times to retry the insert.
    Raises:
      AppScaleDBConnectionError if the insert is tried too many times.
    """
        if retries < 0:
            raise AppScaleDBConnectionError(
                'Unable to create reserved_ids entry')

        logger.debug('Creating reserved_ids entry for {}'.format(self.project))
        insert = SimpleStatement("""
      INSERT INTO reserved_ids (project, scattered, last_reserved, op_id)
      VALUES (%(project)s, %(scattered)s, 0, uuid())
      IF NOT EXISTS
    """,
                                 retry_policy=NO_RETRIES)
        parameters = {'project': self.project, 'scattered': self.scattered}
        try:
            yield self.tornado_cassandra.execute(insert, parameters)
        except TRANSIENT_CASSANDRA_ERRORS:
            yield self._ensure_entry(retries=retries - 1)

    @gen.coroutine
    def _get_last_reserved(self):
        """ Retrieves the last entity ID that was reserved.

    Returns:
      An integer specifying an entity ID.
    """
        get_reserved = SimpleStatement(
            """
      SELECT last_reserved
      FROM reserved_ids
      WHERE project = %(project)s
      AND scattered = %(scattered)s
    """,
            consistency_level=ConsistencyLevel.SERIAL)
        parameters = {'project': self.project, 'scattered': self.scattered}
        try:
            results = yield self.tornado_cassandra.execute(
                get_reserved, parameters)
            result = results[0]
        except IndexError:
            yield self._ensure_entry()
            last_reserved = yield self._get_last_reserved()
            raise gen.Return(last_reserved)

        self._last_reserved_cache = result.last_reserved
        raise gen.Return(result.last_reserved)

    @gen.coroutine
    def _get_last_op_id(self):
        """ Retrieve the op_id that was last written during a reservation.

    Returns:
      A UUID4 containing the latest op_id.
    """
        get_op_id = SimpleStatement("""
      SELECT op_id
      FROM reserved_ids
      WHERE project = %(project)s
      AND scattered = %(scattered)s
    """,
                                    consistency_level=ConsistencyLevel.SERIAL)
        parameters = {'project': self.project, 'scattered': self.scattered}
        results = yield self.tornado_cassandra.execute(get_op_id, parameters)
        raise gen.Return(results[0].op_id)

    @gen.coroutine
    def _set_reserved(self, last_reserved, new_reserved):
        """ Update the last reserved value to allocate that block.

    Args:
      last_reserved: An integer specifying the last reserved value.
      new_reserved: An integer specifying the new reserved value.
    Raises:
      ReservationFailed if the update statement fails.
    """
        op_id = uuid.uuid4()
        set_reserved = SimpleStatement("""
      UPDATE reserved_ids
      SET last_reserved = %(new_reserved)s,
          op_id = %(op_id)s
      WHERE project = %(project)s
      AND scattered = %(scattered)s
      IF last_reserved = %(last_reserved)s
    """,
                                       retry_policy=NO_RETRIES)
        parameters = {
            'last_reserved': last_reserved,
            'new_reserved': new_reserved,
            'project': self.project,
            'scattered': self.scattered,
            'op_id': op_id
        }
        try:
            result = yield self.tornado_cassandra.execute(
                set_reserved, parameters)
        except TRANSIENT_CASSANDRA_ERRORS as error:
            last_op_id = yield self._get_last_op_id()
            if last_op_id == op_id:
                return
            raise ReservationFailed(str(error))

        if not result.was_applied:
            raise ReservationFailed('Last reserved value changed')

        self._last_reserved_cache = new_reserved

    @gen.coroutine
    def allocate_size(self, size, retries=5, min_counter=None):
        """ Reserve a block of IDs for this project.

    Args:
      size: The number of IDs to reserve.
      retries: The number of times to retry the reservation.
      min_counter: The minimum counter value that should be reserved.
    Returns:
      A tuple of integers specifying the start and end ID.
    Raises:
      AppScaleDBConnectionError if the reservation is tried too many times.
      AppScaleBadArg if the ID space has been exhausted.
    """
        if retries < 0:
            raise AppScaleDBConnectionError('Unable to reserve new block')

        try:
            last_reserved = yield self._get_last_reserved()
        except TRANSIENT_CASSANDRA_ERRORS:
            raise AppScaleDBConnectionError('Unable to get last reserved ID')

        if min_counter is None:
            new_reserved = last_reserved + size
        else:
            new_reserved = max(last_reserved, min_counter) + size

        if new_reserved > self.max_allowed:
            raise AppScaleBadArg('Exceeded maximum allocated IDs')

        try:
            yield self._set_reserved(last_reserved, new_reserved)
        except ReservationFailed:
            start_id, end_id = yield self.allocate_size(size,
                                                        retries=retries - 1)
            raise gen.Return((start_id, end_id))

        start_id = last_reserved + 1
        end_id = new_reserved
        raise gen.Return((start_id, end_id))

    @gen.coroutine
    def allocate_max(self, max_id, retries=5):
        """ Reserves all IDs up to the one given.

    Args:
      max_id: An integer specifying the maximum ID to allocated.
      retries: The number of times to retry the reservation.
    Returns:
      A tuple of integers specifying the start and end ID.
    Raises:
      AppScaleDBConnectionError if the reservation is tried too many times.
      AppScaleBadArg if the ID space has been exhausted.
    """
        if retries < 0:
            raise AppScaleDBConnectionError('Unable to reserve new block')

        if max_id > self.max_allowed:
            raise AppScaleBadArg('Exceeded maximum allocated IDs')

        try:
            last_reserved = yield self._get_last_reserved()
        except TRANSIENT_CASSANDRA_ERRORS:
            raise AppScaleDBConnectionError('Unable to get last reserved ID')

        # Instead of returning an error, the API returns an invalid range.
        if last_reserved >= max_id:
            raise gen.Return((last_reserved + 1, last_reserved))

        try:
            yield self._set_reserved(last_reserved, max_id)
        except ReservationFailed:
            start_id, end_id = yield self.allocate_max(max_id,
                                                       retries=retries - 1)
            raise gen.Return((start_id, end_id))

        start_id = last_reserved + 1
        end_id = max_id
        raise gen.Return((start_id, end_id))

    @gen.coroutine
    def set_min_counter(self, counter):
        """ Ensures the counter is at least as large as the given value.

    Args:
      counter: An integer specifying the minimum counter value.
    """
        if (self._last_reserved_cache is not None
                and self._last_reserved_cache >= counter):
            return

        yield self.allocate_max(counter)
Exemplo n.º 12
0
class BatchResolver(object):
    """ Resolves large batches. """
    def __init__(self, project_id, db_access):
        """ Creates a new BatchResolver.

    Args:
      project_id: A string specifying a project ID.
      db_access: A DatastoreProxy.
    """
        self.project_id = project_id

        self._db_access = db_access
        self._session = self._db_access.session
        self._tornado_cassandra = TornadoCassandra(self._session)
        self._prepared_statements = {}

    @gen.coroutine
    def resolve(self, txid, composite_indexes):
        """ Resolves a large batch for a given transaction.

    Args:
      txid: An integer specifying a transaction ID.
      composite_indexes: A list of CompositeIndex objects.
    """
        txid_hash = tx_partition(self.project_id, txid)
        new_op_id = uuid.uuid4()
        try:
            batch_status = yield self._get_status(txid_hash)
        except BatchNotFound:
            # Make sure another process doesn't try to commit the transaction.
            yield self._insert(txid_hash, new_op_id)
            return

        old_op_id = batch_status.op_id
        yield self._update_op_id(txid_hash, batch_status.applied, old_op_id,
                                 new_op_id)

        if batch_status.applied:
            # Make sure all the mutations in the batch have been applied.
            yield self._apply_mutations(txid, composite_indexes)

    @gen.coroutine
    def cleanup(self, txid):
        """ Cleans up the metadata from the finished batch.

    Args:
      txid: An integer specifying a transaction ID.
    """
        txid_hash = tx_partition(self.project_id, txid)
        yield self._delete_mutations(txid)
        yield self._delete_status(txid_hash)

    def _get_prepared(self, statement):
        """ Caches prepared statements.

    Args:
      statement: A string containing a Cassandra statement.
    """
        if statement not in self._prepared_statements:
            self._prepared_statements[statement] = self._session.prepare(
                statement)

        return self._prepared_statements[statement]

    @gen.coroutine
    def _get_status(self, txid_hash):
        """ Gets the current status of a large batch.

    Args:
      txid_hash: A byte array identifying the transaction.
    Returns:
      A Cassandra result for the batch entry.
    """
        statement = self._get_prepared("""
      SELECT applied, op_id FROM batch_status
      WHERE txid_hash = ?
    """)
        bound_statement = statement.bind((txid_hash, ))
        bound_statement.consistency_level = ConsistencyLevel.SERIAL
        bound_statement.retry_policy = BASIC_RETRIES
        results = yield self._tornado_cassandra.execute(bound_statement)
        try:
            raise gen.Return(results[0])
        except IndexError:
            raise BatchNotFound('Batch not found')

    @gen.coroutine
    def _insert(self, txid_hash, op_id):
        """ Claims the large batch.

    Args:
      txid_hash: A byte array identifying the transaction.
      op_id: A uuid4 specifying the process ID.
    """
        statement = self._get_prepared("""
      INSERT INTO batch_status (txid_hash, applied, op_id)
      VALUES (?, ?, ?)
      IF NOT EXISTS
    """)
        bound_statement = statement.bind((txid_hash, False, op_id))
        bound_statement.retry_policy = NO_RETRIES
        results = yield self._tornado_cassandra.execute(bound_statement)
        if not results[0].applied:
            raise BatchNotOwned(
                'Another process started applying the transaction')

    @gen.coroutine
    def _select_mutations(self, txid):
        """ Fetches a list of the mutations for the batch.

    Args:
      txid: An integer specifying a transaction ID.
    Returns:
      An iterator of Cassandra results.
    """
        statement = self._get_prepared("""
      SELECT old_value, new_value FROM batches
      WHERE app = ? AND transaction = ?
    """)
        bound_statement = statement.bind((self.project_id, txid))
        bound_statement.retry_policy = BASIC_RETRIES
        results = yield self._tornado_cassandra.execute(bound_statement)
        raise gen.Return(results)

    @gen.coroutine
    def _apply_mutations(self, txid, composite_indexes):
        """ Applies all the mutations in the batch.

    Args:
      txid: An integer specifying a transaction ID.
      composite_indexes: A list of CompositeIndex objects.
    """
        results = yield self._select_mutations(txid)
        futures = []
        for result in results:
            old_entity = result.old_value
            if old_entity is not None:
                old_entity = entity_pb.EntityProto(old_entity)

            new_entity = result.new_value

            if new_entity is None:
                mutations = deletions_for_entity(old_entity, composite_indexes)
            else:
                new_entity = entity_pb.EntityProto(new_entity)
                mutations = mutations_for_entity(new_entity, txid, old_entity,
                                                 composite_indexes)

            statements_and_params = self._db_access.statements_for_mutations(
                mutations, txid)
            for statement, params in statements_and_params:
                futures.append(
                    self._tornado_cassandra.execute(statement, params))

        yield futures

    @gen.coroutine
    def _update_op_id(self, txid_hash, applied_status, old_op_id, new_op_id):
        """ Claims a batch that is in progress.

    Args:
      txid_hash: A byte array identifying the transaction.
      applied_status: A boolean indicating that the batch has been committed.
      old_op_id: A uuid4 specifying the last read process ID.
      new_op_id: A uuid4 specifying the new process ID.
    """
        statement = self._get_prepared("""
      UPDATE batch_status
      SET op_id = ?
      WHERE txid_hash = ?
      IF op_id = ?
      AND applied = ?
    """)
        params = (new_op_id, txid_hash, old_op_id, applied_status)
        bound_statement = statement.bind(params)
        bound_statement.retry_policy = NO_RETRIES
        results = yield self._tornado_cassandra.execute(bound_statement)
        if not results[0].applied:
            raise BatchNotOwned('Batch status changed after checking')

    @gen.coroutine
    def _delete_mutations(self, txid):
        """ Removes mutation entries for the batch.

    Args:
      txid: An integer specifying a transaction ID.
    """
        statement = self._get_prepared("""
      DELETE FROM batches
      WHERE app = ? AND transaction = ?
    """)
        params = (self.project_id, txid)
        bound_statement = statement.bind(params)
        bound_statement.retry_policy = BASIC_RETRIES
        yield self._tornado_cassandra.execute(bound_statement)

    @gen.coroutine
    def _delete_status(self, txid_hash):
        """ Removes the batch status entry.

    Args:
      txid_hash: A byte array identifying a transaction.
    """
        statement = self._get_prepared("""
      DELETE FROM batch_status
      WHERE txid_hash = ?
      IF EXISTS
    """)
        bound_statement = statement.bind((txid_hash, ))
        bound_statement.retry_policy = NO_RETRIES
        yield self._tornado_cassandra.execute(bound_statement)
Exemplo n.º 13
0
class LargeBatch(object):
  def __init__(self, session, project, txid):
    """ Create a new LargeBatch object.

    Args:
      session: A cassandra-driver session.
      project: A string specifying a project ID.
      txid: An integer specifying a transaction ID.
    """
    self.session = session
    self.tornado_cassandra = TornadoCassandra(self.session)
    self.project = project
    self.txid = txid

    # Create an identifier so that it's possible to check if operations succeed
    # after a timeout.
    self.op_id = uuid.uuid4()

    # This value is used when claiming an existing failed batch.
    self.read_op_id = None

    # Indicates if the batch has been applied.
    self.applied = False

  @gen.coroutine
  def is_applied(self, retries=5):
    """ Fetch the status of the batch.

    Args:
      retries: The number of times to retry after failures.
    Returns:
      A boolean indicating whether or not the batch has been applied.
    Raises:
      BatchNotFound if the batch cannot be found.
      BatchNotOwned if a different process owns the batch.
    """
    if self.applied:
      raise gen.Return(True)

    get_status = """
      SELECT applied, op_id FROM batch_status
      WHERE txid_hash = %(txid_hash)s
    """
    query = SimpleStatement(get_status, retry_policy=BASIC_RETRIES,
                            consistency_level=ConsistencyLevel.SERIAL)
    parameters = {'txid_hash': tx_partition(self.project, self.txid)}

    try:
      results = yield self.tornado_cassandra.execute(
        query, parameters=parameters)
      result = results[0]
      if result.op_id != self.op_id:
        self.read_op_id = result.op_id
        raise BatchNotOwned(
          '{} does not match {}'.format(self.op_id, result.op_id))
      raise gen.Return(result.applied)
    except TRANSIENT_CASSANDRA_ERRORS:
      retries_left = retries - 1
      if retries_left < 0:
        raise

      logger.debug('Unable to read batch status. Retrying.')
      is_applied = yield self.is_applied(retries=retries_left)
      raise gen.Return(is_applied)
    except IndexError:
      raise BatchNotFound(
        'Batch for {}:{} not found'.format(self.project, self.txid))

  @gen.coroutine
  def start(self, retries=5):
    """ Mark the batch as being in progress.

    Args:
      retries: The number of times to retry after failures.
    Raises:
      FailedBatch if the batch cannot be marked as being started.
    """
    if retries < 0:
      raise FailedBatch('Retries exhausted while starting batch')

    insert = SimpleStatement("""
      INSERT INTO batch_status (txid_hash, applied, op_id)
      VALUES (%(txid_hash)s, False, %(op_id)s)
      IF NOT EXISTS
    """, retry_policy=NO_RETRIES)
    parameters = {'txid_hash': tx_partition(self.project, self.txid),
                  'op_id': self.op_id}

    try:
      result = yield self.tornado_cassandra.execute(insert, parameters)
    except TRANSIENT_CASSANDRA_ERRORS:
      yield self.start(retries=retries-1)
      return

    if result.was_applied:
      return

    # Make sure this process was responsible for the insert.
    try:
      yield self.is_applied()
    except (BatchNotOwned, TRANSIENT_CASSANDRA_ERRORS) as batch_failure:
      raise FailedBatch(str(batch_failure))
    except BatchNotFound:
      yield self.start(retries=retries-1)
      return

  @gen.coroutine
  def set_applied(self, retries=5):
    """ Mark the batch as being applied.

    Args:
      retries: The number of times to retry after failures.
    Raises:
      FailedBatch if the batch cannot be marked as applied.
    """
    if retries < 0:
      raise FailedBatch('Retries exhausted while updating batch')

    update_status = SimpleStatement("""
      UPDATE batch_status
      SET applied = True
      WHERE txid_hash = %(txid_hash)s
      IF op_id = %(op_id)s
    """, retry_policy=NO_RETRIES)
    parameters = {'txid_hash': tx_partition(self.project, self.txid),
                  'op_id': self.op_id}

    try:
      result = yield self.tornado_cassandra.execute(update_status, parameters)
      if result.was_applied:
        self.applied = True
        return
    except TRANSIENT_CASSANDRA_ERRORS:
      pass  # Application is confirmed below.

    try:
      self.applied = yield self.is_applied()
      if self.applied:
        return
      yield self.set_applied(retries=retries-1)
      return
    except (BatchNotFound, BatchNotOwned, TRANSIENT_CASSANDRA_ERRORS) as error:
      raise FailedBatch(str(error))

  @gen.coroutine
  def cleanup(self, retries=5):
    """ Clean up the batch status entry.

    Args:
      retries: The number of times to retry after failures.
    Raises:
      FailedBatch if the batch cannot be marked as applied.
    """
    if retries < 0:
      raise FailedBatch('Retries exhausted while cleaning up batch')

    clear_status = SimpleStatement("""
      DELETE FROM batch_status
      WHERE txid_hash = %(txid_hash)s
      IF op_id = %(op_id)s
    """, retry_policy=NO_RETRIES)
    parameters = {'txid_hash': tx_partition(self.project, self.txid),
                  'op_id': self.op_id}

    try:
      result = yield self.tornado_cassandra.execute(clear_status, parameters)
    except TRANSIENT_CASSANDRA_ERRORS:
      yield self.cleanup(retries=retries-1)
      return

    if not result.was_applied:
      raise FailedBatch(
        'Unable to clean up batch for {}:{}'.format(self.project, self.txid))
Exemplo n.º 14
0
class BatchResolver(object):
  """ Resolves large batches. """
  def __init__(self, project_id, db_access):
    """ Creates a new BatchResolver.

    Args:
      project_id: A string specifying a project ID.
      db_access: A DatastoreProxy.
    """
    self.project_id = project_id

    self._db_access = db_access
    self._session = self._db_access.session
    self._tornado_cassandra = TornadoCassandra(self._session)
    self._prepared_statements = {}

  @gen.coroutine
  def resolve(self, txid, composite_indexes):
    """ Resolves a large batch for a given transaction.

    Args:
      txid: An integer specifying a transaction ID.
      composite_indexes: A list of CompositeIndex objects.
    """
    txid_hash = tx_partition(self.project_id, txid)
    new_op_id = uuid.uuid4()
    try:
      batch_status = yield self._get_status(txid_hash)
    except BatchNotFound:
      # Make sure another process doesn't try to commit the transaction.
      yield self._insert(txid_hash, new_op_id)
      return

    old_op_id = batch_status.op_id
    yield self._update_op_id(txid_hash, batch_status.applied, old_op_id,
                             new_op_id)

    if batch_status.applied:
      # Make sure all the mutations in the batch have been applied.
      yield self._apply_mutations(txid, composite_indexes)

  @gen.coroutine
  def cleanup(self, txid):
    """ Cleans up the metadata from the finished batch.

    Args:
      txid: An integer specifying a transaction ID.
    """
    txid_hash = tx_partition(self.project_id, txid)
    yield self._delete_mutations(txid)
    yield self._delete_status(txid_hash)

  def _get_prepared(self, statement):
    """ Caches prepared statements.

    Args:
      statement: A string containing a Cassandra statement.
    """
    if statement not in self._prepared_statements:
      self._prepared_statements[statement] = self._session.prepare(statement)

    return self._prepared_statements[statement]

  @gen.coroutine
  def _get_status(self, txid_hash):
    """ Gets the current status of a large batch.

    Args:
      txid_hash: A byte array identifying the transaction.
    Returns:
      A Cassandra result for the batch entry.
    """
    statement = self._get_prepared("""
      SELECT applied, op_id FROM batch_status
      WHERE txid_hash = ?
    """)
    bound_statement = statement.bind((txid_hash,))
    bound_statement.consistency_level = ConsistencyLevel.SERIAL
    bound_statement.retry_policy = BASIC_RETRIES
    results = yield self._tornado_cassandra.execute(bound_statement)
    try:
      raise gen.Return(results[0])
    except IndexError:
      raise BatchNotFound('Batch not found')

  @gen.coroutine
  def _insert(self, txid_hash, op_id):
    """ Claims the large batch.

    Args:
      txid_hash: A byte array identifying the transaction.
      op_id: A uuid4 specifying the process ID.
    """
    statement = self._get_prepared("""
      INSERT INTO batch_status (txid_hash, applied, op_id)
      VALUES (?, ?, ?)
      IF NOT EXISTS
    """)
    bound_statement = statement.bind((txid_hash, False, op_id))
    bound_statement.retry_policy = NO_RETRIES
    results = yield self._tornado_cassandra.execute(bound_statement)
    if not results[0].applied:
      raise BatchNotOwned('Another process started applying the transaction')

  @gen.coroutine
  def _select_mutations(self, txid):
    """ Fetches a list of the mutations for the batch.

    Args:
      txid: An integer specifying a transaction ID.
    Returns:
      An iterator of Cassandra results.
    """
    statement = self._get_prepared("""
      SELECT old_value, new_value FROM batches
      WHERE app = ? AND transaction = ?
    """)
    bound_statement = statement.bind((self.project_id, txid))
    bound_statement.retry_policy = BASIC_RETRIES
    results = yield self._tornado_cassandra.execute(bound_statement)
    raise gen.Return(results)

  @gen.coroutine
  def _apply_mutations(self, txid, composite_indexes):
    """ Applies all the mutations in the batch.

    Args:
      txid: An integer specifying a transaction ID.
      composite_indexes: A list of CompositeIndex objects.
    """
    results = yield self._select_mutations(txid)
    futures = []
    for result in results:
      old_entity = result.old_value
      if old_entity is not None:
        old_entity = entity_pb.EntityProto(old_entity)

      new_entity = result.new_value

      if new_entity is None:
        mutations = deletions_for_entity(old_entity, composite_indexes)
      else:
        new_entity = entity_pb.EntityProto(new_entity)
        mutations = mutations_for_entity(new_entity, txid, old_entity,
                                         composite_indexes)

      statements_and_params = self._db_access.statements_for_mutations(
        mutations, txid)
      for statement, params in statements_and_params:
        futures.append(self._tornado_cassandra.execute(statement, params))

    yield futures

  @gen.coroutine
  def _update_op_id(self, txid_hash, applied_status, old_op_id, new_op_id):
    """ Claims a batch that is in progress.

    Args:
      txid_hash: A byte array identifying the transaction.
      applied_status: A boolean indicating that the batch has been committed.
      old_op_id: A uuid4 specifying the last read process ID.
      new_op_id: A uuid4 specifying the new process ID.
    """
    statement = self._get_prepared("""
      UPDATE batch_status
      SET op_id = ?
      WHERE txid_hash = ?
      IF op_id = ?
      AND applied = ?
    """)
    params = (new_op_id, txid_hash, old_op_id, applied_status)
    bound_statement = statement.bind(params)
    bound_statement.retry_policy = NO_RETRIES
    results = yield self._tornado_cassandra.execute(bound_statement)
    if not results[0].applied:
      raise BatchNotOwned('Batch status changed after checking')

  @gen.coroutine
  def _delete_mutations(self, txid):
    """ Removes mutation entries for the batch.

    Args:
      txid: An integer specifying a transaction ID.
    """
    statement = self._get_prepared("""
      DELETE FROM batches
      WHERE app = ? AND transaction = ?
    """)
    params = (self.project_id, txid)
    bound_statement = statement.bind(params)
    bound_statement.retry_policy = BASIC_RETRIES
    yield self._tornado_cassandra.execute(bound_statement)

  @gen.coroutine
  def _delete_status(self, txid_hash):
    """ Removes the batch status entry.

    Args:
      txid_hash: A byte array identifying a transaction.
    """
    statement = self._get_prepared("""
      DELETE FROM batch_status
      WHERE txid_hash = ?
      IF EXISTS
    """)
    bound_statement = statement.bind((txid_hash,))
    bound_statement.retry_policy = NO_RETRIES
    yield self._tornado_cassandra.execute(bound_statement)
Exemplo n.º 15
0
class EntityIDAllocator(object):
  """ Keeps track of reserved entity IDs for a project. """

  def __init__(self, session, project, scattered=False):
    """ Creates a new EntityIDAllocator object.

    Args:
      session: A cassandra-drivers session object.
      project: A string specifying a project ID.
    """
    self.project = project
    self.session = session
    self.tornado_cassandra = TornadoCassandra(self.session)
    self.scattered = scattered
    if scattered:
      self.max_allowed = _MAX_SCATTERED_COUNTER
    else:
      self.max_allowed = _MAX_SEQUENTIAL_COUNTER

    # Allows the allocator to avoid making unnecessary Cassandra requests when
    # setting the minimum counter value.
    self._last_reserved_cache = None

  @gen.coroutine
  def _ensure_entry(self, retries=5):
    """ Ensures an entry exists for a reservation.

    Args:
      retries: The number of times to retry the insert.
    Raises:
      AppScaleDBConnectionError if the insert is tried too many times.
    """
    if retries < 0:
      raise AppScaleDBConnectionError('Unable to create reserved_ids entry')

    logger.debug('Creating reserved_ids entry for {}'.format(self.project))
    insert = SimpleStatement("""
      INSERT INTO reserved_ids (project, scattered, last_reserved, op_id)
      VALUES (%(project)s, %(scattered)s, 0, uuid())
      IF NOT EXISTS
    """, retry_policy=NO_RETRIES)
    parameters = {'project': self.project, 'scattered': self.scattered}
    try:
      yield self.tornado_cassandra.execute(insert, parameters)
    except TRANSIENT_CASSANDRA_ERRORS:
      yield self._ensure_entry(retries=retries-1)

  @gen.coroutine
  def _get_last_reserved(self):
    """ Retrieves the last entity ID that was reserved.

    Returns:
      An integer specifying an entity ID.
    """
    get_reserved = SimpleStatement("""
      SELECT last_reserved
      FROM reserved_ids
      WHERE project = %(project)s
      AND scattered = %(scattered)s
    """, consistency_level=ConsistencyLevel.SERIAL)
    parameters = {'project': self.project, 'scattered': self.scattered}
    try:
      results = yield self.tornado_cassandra.execute(get_reserved, parameters)
      result = results[0]
    except IndexError:
      yield self._ensure_entry()
      last_reserved = yield self._get_last_reserved()
      raise gen.Return(last_reserved)

    self._last_reserved_cache = result.last_reserved
    raise gen.Return(result.last_reserved)

  @gen.coroutine
  def _get_last_op_id(self):
    """ Retrieve the op_id that was last written during a reservation.

    Returns:
      A UUID4 containing the latest op_id.
    """
    get_op_id = SimpleStatement("""
      SELECT op_id
      FROM reserved_ids
      WHERE project = %(project)s
      AND scattered = %(scattered)s
    """, consistency_level=ConsistencyLevel.SERIAL)
    parameters = {'project': self.project, 'scattered': self.scattered}
    results = yield self.tornado_cassandra.execute(get_op_id, parameters)
    raise gen.Return(results[0].op_id)

  @gen.coroutine
  def _set_reserved(self, last_reserved, new_reserved):
    """ Update the last reserved value to allocate that block.

    Args:
      last_reserved: An integer specifying the last reserved value.
      new_reserved: An integer specifying the new reserved value.
    Raises:
      ReservationFailed if the update statement fails.
    """
    op_id = uuid.uuid4()
    set_reserved = SimpleStatement("""
      UPDATE reserved_ids
      SET last_reserved = %(new_reserved)s,
          op_id = %(op_id)s
      WHERE project = %(project)s
      AND scattered = %(scattered)s
      IF last_reserved = %(last_reserved)s
    """, retry_policy=NO_RETRIES)
    parameters = {
      'last_reserved': last_reserved, 'new_reserved': new_reserved,
      'project': self.project, 'scattered': self.scattered, 'op_id': op_id}
    try:
      result = yield self.tornado_cassandra.execute(set_reserved, parameters)
    except TRANSIENT_CASSANDRA_ERRORS as error:
      last_op_id = yield self._get_last_op_id()
      if last_op_id == op_id:
        return
      raise ReservationFailed(str(error))

    if not result.was_applied:
      raise ReservationFailed('Last reserved value changed')

    self._last_reserved_cache = new_reserved

  @gen.coroutine
  def allocate_size(self, size, retries=5, min_counter=None):
    """ Reserve a block of IDs for this project.

    Args:
      size: The number of IDs to reserve.
      retries: The number of times to retry the reservation.
      min_counter: The minimum counter value that should be reserved.
    Returns:
      A tuple of integers specifying the start and end ID.
    Raises:
      AppScaleDBConnectionError if the reservation is tried too many times.
      AppScaleBadArg if the ID space has been exhausted.
    """
    if retries < 0:
      raise AppScaleDBConnectionError('Unable to reserve new block')

    try:
      last_reserved = yield self._get_last_reserved()
    except TRANSIENT_CASSANDRA_ERRORS:
      raise AppScaleDBConnectionError('Unable to get last reserved ID')

    if min_counter is None:
      new_reserved = last_reserved + size
    else:
      new_reserved = max(last_reserved, min_counter) + size

    if new_reserved > self.max_allowed:
      raise AppScaleBadArg('Exceeded maximum allocated IDs')

    try:
      yield self._set_reserved(last_reserved, new_reserved)
    except ReservationFailed:
      start_id, end_id = yield self.allocate_size(size, retries=retries-1)
      raise gen.Return((start_id, end_id))

    start_id = last_reserved + 1
    end_id = new_reserved
    raise gen.Return((start_id, end_id))

  @gen.coroutine
  def allocate_max(self, max_id, retries=5):
    """ Reserves all IDs up to the one given.

    Args:
      max_id: An integer specifying the maximum ID to allocated.
      retries: The number of times to retry the reservation.
    Returns:
      A tuple of integers specifying the start and end ID.
    Raises:
      AppScaleDBConnectionError if the reservation is tried too many times.
      AppScaleBadArg if the ID space has been exhausted.
    """
    if retries < 0:
      raise AppScaleDBConnectionError('Unable to reserve new block')

    if max_id > self.max_allowed:
      raise AppScaleBadArg('Exceeded maximum allocated IDs')

    try:
      last_reserved = yield self._get_last_reserved()
    except TRANSIENT_CASSANDRA_ERRORS:
      raise AppScaleDBConnectionError('Unable to get last reserved ID')

    # Instead of returning an error, the API returns an invalid range.
    if last_reserved >= max_id:
      raise gen.Return((last_reserved + 1, last_reserved))

    try:
      yield self._set_reserved(last_reserved, max_id)
    except ReservationFailed:
      start_id, end_id = yield self.allocate_max(max_id, retries=retries-1)
      raise gen.Return((start_id, end_id))

    start_id = last_reserved + 1
    end_id = max_id
    raise gen.Return((start_id, end_id))

  @gen.coroutine
  def set_min_counter(self, counter):
    """ Ensures the counter is at least as large as the given value.

    Args:
      counter: An integer specifying the minimum counter value.
    """
    if (self._last_reserved_cache is not None and
        self._last_reserved_cache >= counter):
      return

    yield self.allocate_max(counter)
Exemplo n.º 16
0
class DatastoreProxy(AppDBInterface):
  """
    Cassandra implementation of the AppDBInterface
  """
  def __init__(self, log_level=logging.INFO, hosts=None):
    """
    Constructor.
    """
    class_name = self.__class__.__name__
    self.logger = logging.getLogger(class_name)
    self.logger.setLevel(log_level)
    self.logger.info('Starting {}'.format(class_name))

    if hosts is not None:
      self.hosts = hosts
    else:
      self.hosts = appscale_info.get_db_ips()

    remaining_retries = INITIAL_CONNECT_RETRIES
    while True:
      try:
        self.cluster = Cluster(self.hosts, default_retry_policy=BASIC_RETRIES,
                               load_balancing_policy=LB_POLICY)
        self.session = self.cluster.connect(KEYSPACE)
        self.tornado_cassandra = TornadoCassandra(self.session)
        break
      except cassandra.cluster.NoHostAvailable as connection_error:
        remaining_retries -= 1
        if remaining_retries < 0:
          raise connection_error
        time.sleep(3)

    self.session.default_consistency_level = ConsistencyLevel.QUORUM
    self.prepared_statements = {}

    # Provide synchronous version of some async methods
    self.batch_get_entity_sync = tornado_synchronous(self.batch_get_entity)
    self.batch_put_entity_sync = tornado_synchronous(self.batch_put_entity)
    self.batch_delete_sync = tornado_synchronous(self.batch_delete)
    self.valid_data_version_sync = tornado_synchronous(self.valid_data_version)
    self.range_query_sync = tornado_synchronous(self.range_query)
    self.get_metadata_sync = tornado_synchronous(self.get_metadata)
    self.set_metadata_sync = tornado_synchronous(self.set_metadata)
    self.delete_table_sync = tornado_synchronous(self.delete_table)

  def close(self):
    """ Close all sessions and connections to Cassandra. """
    self.cluster.shutdown()

  @gen.coroutine
  def batch_get_entity(self, table_name, row_keys, column_names):
    """
    Takes in batches of keys and retrieves their corresponding rows.

    Args:
      table_name: The table to access
      row_keys: A list of keys to access
      column_names: A list of columns to access
    Returns:
      A dictionary of rows and columns/values of those rows. The format
      looks like such: {key:{column_name:value,...}}
    Raises:
      TypeError: If an argument passed in was not of the expected type.
      AppScaleDBConnectionError: If the batch_get could not be performed due to
        an error with Cassandra.
    """
    if not isinstance(table_name, str): raise TypeError("Expected a str")
    if not isinstance(column_names, list): raise TypeError("Expected a list")
    if not isinstance(row_keys, list): raise TypeError("Expected a list")

    row_keys_bytes = [bytearray(row_key) for row_key in row_keys]

    statement = 'SELECT * FROM "{table}" '\
                'WHERE {key} IN %s and {column} IN %s'.format(
                  table=table_name,
                  key=ThriftColumn.KEY,
                  column=ThriftColumn.COLUMN_NAME,
                )
    query = SimpleStatement(statement, retry_policy=BASIC_RETRIES)

    results = []
    # Split the rows up into chunks to reduce the likelihood of timeouts.
    chunk_indexes = [
      (n, n + ENTITY_FETCH_THRESHOLD)
      for n in xrange(0, len(row_keys_bytes), ENTITY_FETCH_THRESHOLD)]

    # TODO: This can be made more efficient by maintaining a constant number
    # of concurrent requests rather than waiting for each batch to complete.
    for start, end in chunk_indexes:
      parameters = (ValueSequence(row_keys_bytes[start:end]),
                    ValueSequence(column_names))
      try:
        batch_results = yield self.tornado_cassandra.execute(
          query, parameters=parameters)
      except dbconstants.TRANSIENT_CASSANDRA_ERRORS:
        message = 'Exception during batch_get_entity'
        logger.exception(message)
        raise AppScaleDBConnectionError(message)

      results.extend(list(batch_results))

    results_dict = {row_key: {} for row_key in row_keys}
    for (key, column, value) in results:
      if key not in results_dict:
        results_dict[key] = {}

      results_dict[key][column] = value

    raise gen.Return(results_dict)

  @gen.coroutine
  def batch_put_entity(self, table_name, row_keys, column_names, cell_values,
                       ttl=None):
    """
    Allows callers to store multiple rows with a single call. A row can
    have multiple columns and values with them. We refer to each row as
    an entity.

    Args:
      table_name: The table to mutate
      row_keys: A list of keys to store on
      column_names: A list of columns to mutate
      cell_values: A dict of key/value pairs
      ttl: The number of seconds to keep the row.
    Raises:
      TypeError: If an argument passed in was not of the expected type.
      AppScaleDBConnectionError: If the batch_put could not be performed due to
        an error with Cassandra.
    """
    if not isinstance(table_name, str):
      raise TypeError("Expected a str")
    if not isinstance(column_names, list):
      raise TypeError("Expected a list")
    if not isinstance(row_keys, list):
      raise TypeError("Expected a list")
    if not isinstance(cell_values, dict):
      raise TypeError("Expected a dict")

    insert_str = (
      'INSERT INTO "{table}" ({key}, {column}, {value}) '
      'VALUES (?, ?, ?)'
    ).format(table=table_name,
               key=ThriftColumn.KEY,
               column=ThriftColumn.COLUMN_NAME,
               value=ThriftColumn.VALUE)

    if ttl is not None:
      insert_str += 'USING TTL {}'.format(ttl)

    statement = self.session.prepare(insert_str)

    statements_and_params = []
    for row_key in row_keys:
      for column in column_names:
        params = (bytearray(row_key), column,
                  bytearray(cell_values[row_key][column]))
        statements_and_params.append((statement, params))

    try:
      yield [
        self.tornado_cassandra.execute(statement, parameters=params)
        for statement, params in statements_and_params
      ]
    except dbconstants.TRANSIENT_CASSANDRA_ERRORS:
      message = 'Exception during batch_put_entity'
      logger.exception(message)
      raise AppScaleDBConnectionError(message)

  def prepare_insert(self, table):
    """ Prepare an insert statement.

    Args:
      table: A string containing the table name.
    Returns:
      A PreparedStatement object.
    """
    statement = (
      'INSERT INTO "{table}" ({key}, {column}, {value}) '
      'VALUES (?, ?, ?) '
      'USING TIMESTAMP ?'
    ).format(table=table,
               key=ThriftColumn.KEY,
               column=ThriftColumn.COLUMN_NAME,
               value=ThriftColumn.VALUE)

    if statement not in self.prepared_statements:
      self.prepared_statements[statement] = self.session.prepare(statement)

    return self.prepared_statements[statement]

  def prepare_delete(self, table):
    """ Prepare a delete statement.

    Args:
      table: A string containing the table name.
    Returns:
      A PreparedStatement object.
    """
    statement = (
      'DELETE FROM "{table}" '
      'USING TIMESTAMP ? '
      'WHERE {key} = ?'
    ).format(table=table, key=ThriftColumn.KEY)

    if statement not in self.prepared_statements:
      self.prepared_statements[statement] = self.session.prepare(statement)

    return self.prepared_statements[statement]

  @gen.coroutine
  def normal_batch(self, mutations, txid):
    """ Use Cassandra's native batch statement to apply mutations atomically.

    Args:
      mutations: A list of dictionaries representing mutations.
      txid: An integer specifying a transaction ID.
    """
    self.logger.debug('Normal batch: {} mutations'.format(len(mutations)))
    batch = BatchStatement(consistency_level=ConsistencyLevel.QUORUM,
                           retry_policy=BASIC_RETRIES)
    prepared_statements = {'insert': {}, 'delete': {}}
    for mutation in mutations:
      table = mutation['table']

      if table == 'group_updates':
        key = mutation['key']
        insert = (
          'INSERT INTO group_updates (group, last_update) '
          'VALUES (%(group)s, %(last_update)s) '
          'USING TIMESTAMP %(timestamp)s'
        )
        parameters = {'group': key, 'last_update': mutation['last_update'],
                      'timestamp': get_write_time(txid)}
        batch.add(insert, parameters)
        continue

      if mutation['operation'] == Operations.PUT:
        if table not in prepared_statements['insert']:
          prepared_statements['insert'][table] = self.prepare_insert(table)
        values = mutation['values']
        for column in values:
          batch.add(
            prepared_statements['insert'][table],
            (bytearray(mutation['key']), column, bytearray(values[column]),
             get_write_time(txid))
          )
      elif mutation['operation'] == Operations.DELETE:
        if table not in prepared_statements['delete']:
          prepared_statements['delete'][table] = self.prepare_delete(table)
        batch.add(
          prepared_statements['delete'][table],
          (get_write_time(txid), bytearray(mutation['key']))
        )

    try:
      yield self.tornado_cassandra.execute(batch)
    except dbconstants.TRANSIENT_CASSANDRA_ERRORS:
      message = 'Unable to apply batch'
      logger.exception(message)
      raise AppScaleDBConnectionError(message)

  def statements_for_mutations(self, mutations, txid):
    """ Generates Cassandra statements for a list of mutations.

    Args:
      mutations: A list of dictionaries representing mutations.
      txid: An integer specifying a transaction ID.
    Returns:
      A list of tuples containing Cassandra statements and parameters.
    """
    prepared_statements = {'insert': {}, 'delete': {}}
    statements_and_params = []
    for mutation in mutations:
      table = mutation['table']

      if table == 'group_updates':
        key = mutation['key']
        insert = (
          'INSERT INTO group_updates (group, last_update) '
          'VALUES (%(group)s, %(last_update)s) '
          'USING TIMESTAMP %(timestamp)s'
        )
        parameters = {'group': key, 'last_update': mutation['last_update'],
                      'timestamp': get_write_time(txid)}
        statements_and_params.append((SimpleStatement(insert), parameters))
        continue

      if mutation['operation'] == Operations.PUT:
        if table not in prepared_statements['insert']:
          prepared_statements['insert'][table] = self.prepare_insert(table)
        values = mutation['values']
        for column in values:
          params = (bytearray(mutation['key']), column,
                    bytearray(values[column]), get_write_time(txid))
          statements_and_params.append(
            (prepared_statements['insert'][table], params))
      elif mutation['operation'] == Operations.DELETE:
        if table not in prepared_statements['delete']:
          prepared_statements['delete'][table] = self.prepare_delete(table)
        params = (get_write_time(txid), bytearray(mutation['key']))
        statements_and_params.append(
          (prepared_statements['delete'][table], params))

    return statements_and_params

  @gen.coroutine
  def apply_mutations(self, mutations, txid):
    """ Apply mutations across tables.

    Args:
      mutations: A list of dictionaries representing mutations.
      txid: An integer specifying a transaction ID.
    """
    statements_and_params = self.statements_for_mutations(mutations, txid)
    yield [
      self.tornado_cassandra.execute(statement, parameters=params)
      for statement, params in statements_and_params
    ]

  @gen.coroutine
  def large_batch(self, app, mutations, entity_changes, txn):
    """ Insert or delete multiple rows across tables in an atomic statement.

    Args:
      app: A string containing the application ID.
      mutations: A list of dictionaries representing mutations.
      entity_changes: A list of changes at the entity level.
      txn: A transaction ID handler.
    Raises:
      FailedBatch if a concurrent process modifies the batch status.
      AppScaleDBConnectionError if a database connection error was encountered.
    """
    self.logger.debug('Large batch: transaction {}, {} mutations'.
                      format(txn, len(mutations)))
    large_batch = LargeBatch(self.session, app, txn)
    try:
      yield large_batch.start()
    except FailedBatch as batch_error:
      raise BatchNotApplied(str(batch_error))

    insert_item = (
      'INSERT INTO batches (app, transaction, namespace, '
      '                     path, old_value, new_value) '
      'VALUES (?, ?, ?, ?, ?, ?)'
    )
    insert_statement = self.session.prepare(insert_item)

    statements_and_params = []
    for entity_change in entity_changes:
      old_value = None
      if entity_change['old'] is not None:
        old_value = bytearray(entity_change['old'].Encode())
      new_value = None
      if entity_change['new'] is not None:
        new_value = bytearray(entity_change['new'].Encode())

      parameters = (app, txn, entity_change['key'].name_space(),
                    bytearray(entity_change['key'].path().Encode()), old_value,
                    new_value)
      statements_and_params.append((insert_statement, parameters))

    try:
      yield [
        self.tornado_cassandra.execute(statement, parameters=params)
        for statement, params in statements_and_params
      ]
    except dbconstants.TRANSIENT_CASSANDRA_ERRORS:
      message = 'Unable to write large batch log'
      logger.exception(message)
      raise BatchNotApplied(message)

    # Since failing after this point is expensive and time consuming, retry
    # operations to make a failure less likely.
    custom_retry_coroutine = retry_raw_coroutine(
      backoff_threshold=5, retrying_timeout=10,
      retry_on_exception=dbconstants.TRANSIENT_CASSANDRA_ERRORS)

    persistent_apply_batch = custom_retry_coroutine(large_batch.set_applied)
    try:
      yield persistent_apply_batch()
    except FailedBatch as batch_error:
      raise AppScaleDBConnectionError(str(batch_error))

    persistent_apply_mutations = custom_retry_coroutine(self.apply_mutations)
    try:
      yield persistent_apply_mutations(mutations, txn)
    except dbconstants.TRANSIENT_CASSANDRA_ERRORS:
      message = 'Exception during large batch'
      logger.exception(message)
      raise AppScaleDBConnectionError(message)

    try:
      yield large_batch.cleanup()
    except FailedBatch:
      # This should not raise an exception since the batch is already applied.
      logger.exception('Unable to clear batch status')

    clear_batch = (
      'DELETE FROM batches '
      'WHERE app = %(app)s AND transaction = %(transaction)s'
    )
    parameters = {'app': app, 'transaction': txn}
    try:
      yield self.tornado_cassandra.execute(clear_batch, parameters)
    except dbconstants.TRANSIENT_CASSANDRA_ERRORS:
      logger.exception('Unable to clear batch log')

  @gen.coroutine
  def batch_delete(self, table_name, row_keys, column_names=()):
    """
    Remove a set of rows corresponding to a set of keys.

    Args:
      table_name: Table to delete rows from
      row_keys: A list of keys to remove
      column_names: Not used
    Raises:
      TypeError: If an argument passed in was not of the expected type.
      AppScaleDBConnectionError: If the batch_delete could not be performed due
        to an error with Cassandra.
    """
    if not isinstance(table_name, str): raise TypeError("Expected a str")
    if not isinstance(row_keys, list): raise TypeError("Expected a list")

    row_keys_bytes = [bytearray(row_key) for row_key in row_keys]

    statement = 'DELETE FROM "{table}" WHERE {key} IN %s'.\
      format(
        table=table_name,
        key=ThriftColumn.KEY
      )
    query = SimpleStatement(statement, retry_policy=BASIC_RETRIES)
    parameters = (ValueSequence(row_keys_bytes),)

    try:
      yield self.tornado_cassandra.execute(query, parameters=parameters)
    except dbconstants.TRANSIENT_CASSANDRA_ERRORS:
      message = 'Exception during batch_delete'
      logger.exception(message)
      raise AppScaleDBConnectionError(message)

  @gen.coroutine
  def delete_table(self, table_name):
    """
    Drops a given table (aka column family in Cassandra)

    Args:
      table_name: A string name of the table to drop
    Raises:
      TypeError: If an argument passed in was not of the expected type.
      AppScaleDBConnectionError: If the delete_table could not be performed due
        to an error with Cassandra.
    """
    if not isinstance(table_name, str): raise TypeError("Expected a str")

    statement = 'DROP TABLE IF EXISTS "{table}"'.format(table=table_name)
    query = SimpleStatement(statement, retry_policy=BASIC_RETRIES)

    try:
      yield self.tornado_cassandra.execute(query)
    except dbconstants.TRANSIENT_CASSANDRA_ERRORS:
      message = 'Exception during delete_table'
      logger.exception(message)
      raise AppScaleDBConnectionError(message)

  @gen.coroutine
  def create_table(self, table_name, column_names):
    """
    Creates a table if it doesn't already exist.

    Args:
      table_name: The column family name
      column_names: Not used but here to match the interface
    Raises:
      TypeError: If an argument passed in was not of the expected type.
      AppScaleDBConnectionError: If the create_table could not be performed due
        to an error with Cassandra.
    """
    if not isinstance(table_name, str): raise TypeError("Expected a str")
    if not isinstance(column_names, list): raise TypeError("Expected a list")

    statement = (
      'CREATE TABLE IF NOT EXISTS "{table}" ('
      '{key} blob,'
      '{column} text,'
      '{value} blob,'
      'PRIMARY KEY ({key}, {column})'
      ') WITH COMPACT STORAGE'
    ).format(
      table=table_name,
      key=ThriftColumn.KEY,
      column=ThriftColumn.COLUMN_NAME,
      value=ThriftColumn.VALUE
    )
    query = SimpleStatement(statement, retry_policy=NO_RETRIES)

    try:
      yield self.tornado_cassandra.execute(query, timeout=SCHEMA_CHANGE_TIMEOUT)
    except cassandra.OperationTimedOut:
      logger.warning(
        'Encountered an operation timeout while creating a table. Waiting {} '
        'seconds for schema to settle.'.format(SCHEMA_CHANGE_TIMEOUT))
      time.sleep(SCHEMA_CHANGE_TIMEOUT)
      raise AppScaleDBConnectionError('Exception during create_table')
    except (error for error in dbconstants.TRANSIENT_CASSANDRA_ERRORS
            if error != cassandra.OperationTimedOut):
      message = 'Exception during create_table'
      logger.exception(message)
      raise AppScaleDBConnectionError(message)

  @gen.coroutine
  def range_query(self,
                  table_name,
                  column_names,
                  start_key,
                  end_key,
                  limit,
                  offset=0,
                  start_inclusive=True,
                  end_inclusive=True,
                  keys_only=False):
    """
    Gets a dense range ordered by keys. Returns an ordered list of
    a dictionary of [key:{column1:value1, column2:value2},...]
    or a list of keys if keys only.

    Args:
      table_name: Name of table to access
      column_names: Columns which get returned within the key range
      start_key: String for which the query starts at
      end_key: String for which the query ends at
      limit: Maximum number of results to return
      offset: Cuts off these many from the results [offset:]
      start_inclusive: Boolean if results should include the start_key
      end_inclusive: Boolean if results should include the end_key
      keys_only: Boolean if to only keys and not values
    Raises:
      TypeError: If an argument passed in was not of the expected type.
      AppScaleDBConnectionError: If the range_query could not be performed due
        to an error with Cassandra.
    Returns:
      An ordered list of dictionaries of key=>columns/values
    """
    if not isinstance(table_name, str):
      raise TypeError('table_name must be a string')
    if not isinstance(column_names, list):
      raise TypeError('column_names must be a list')
    if not isinstance(start_key, str):
      raise TypeError('start_key must be a string')
    if not isinstance(end_key, str):
      raise TypeError('end_key must be a string')
    if not isinstance(limit, (int, long)) and limit is not None:
      raise TypeError('limit must be int, long, or NoneType')
    if not isinstance(offset, (int, long)):
      raise TypeError('offset must be int or long')

    if start_inclusive:
      gt_compare = '>='
    else:
      gt_compare = '>'

    if end_inclusive:
      lt_compare = '<='
    else:
      lt_compare = '<'

    query_limit = ''
    if limit is not None:
      query_limit = 'LIMIT {}'.format(len(column_names) * limit)

    statement = (
      'SELECT * FROM "{table}" WHERE '
      'token({key}) {gt_compare} %s AND '
      'token({key}) {lt_compare} %s AND '
      '{column} IN %s '
      '{limit} '
      'ALLOW FILTERING'
    ).format(table=table_name,
             key=ThriftColumn.KEY,
             gt_compare=gt_compare,
             lt_compare=lt_compare,
             column=ThriftColumn.COLUMN_NAME,
             limit=query_limit)

    query = SimpleStatement(statement, retry_policy=BASIC_RETRIES)
    parameters = (bytearray(start_key), bytearray(end_key),
                  ValueSequence(column_names))

    try:
      results = yield self.tornado_cassandra.execute(
        query, parameters=parameters)

      results_list = []
      current_item = {}
      current_key = None
      for (key, column, value) in results:
        if keys_only:
          results_list.append(key)
          continue

        if key != current_key:
          if current_item:
            results_list.append({current_key: current_item})
          current_item = {}
          current_key = key

        current_item[column] = value
      if current_item:
        results_list.append({current_key: current_item})
      raise gen.Return(results_list[offset:])
    except dbconstants.TRANSIENT_CASSANDRA_ERRORS:
      message = 'Exception during range_query'
      logger.exception(message)
      raise AppScaleDBConnectionError(message)

  @gen.coroutine
  def get_metadata(self, key):
    """ Retrieve a value from the datastore metadata table.

    Args:
      key: A string containing the key to fetch.
    Returns:
      A string containing the value or None if the key is not present.
    """
    statement = (
      'SELECT {value} FROM "{table}" '
      'WHERE {key} = %s '
      'AND {column} = %s'
    ).format(
      value=ThriftColumn.VALUE,
      table=dbconstants.DATASTORE_METADATA_TABLE,
      key=ThriftColumn.KEY,
      column=ThriftColumn.COLUMN_NAME
    )
    try:
      results = yield self.tornado_cassandra.execute(
        statement, (bytearray(key), key))
    except dbconstants.TRANSIENT_CASSANDRA_ERRORS:
      message = 'Unable to fetch {} from datastore metadata'.format(key)
      logger.exception(message)
      raise AppScaleDBConnectionError(message)

    try:
      raise gen.Return(results[0].value)
    except IndexError:
      return

  @gen.coroutine
  def set_metadata(self, key, value):
    """ Set a datastore metadata value.

    Args:
      key: A string containing the key to set.
      value: A string containing the value to set.
    """
    if not isinstance(key, str):
      raise TypeError('key should be a string')

    if not isinstance(value, str):
      raise TypeError('value should be a string')

    statement = (
      'INSERT INTO "{table}" ({key}, {column}, {value}) '
      'VALUES (%(key)s, %(column)s, %(value)s)'
    ).format(
      table=dbconstants.DATASTORE_METADATA_TABLE,
      key=ThriftColumn.KEY,
      column=ThriftColumn.COLUMN_NAME,
      value=ThriftColumn.VALUE
    )
    parameters = {'key': bytearray(key),
                  'column': key,
                  'value': bytearray(value)}
    try:
      yield self.tornado_cassandra.execute(statement, parameters)
    except dbconstants.TRANSIENT_CASSANDRA_ERRORS:
      message = 'Unable to set datastore metadata for {}'.format(key)
      logger.exception(message)
      raise AppScaleDBConnectionError(message)
    except cassandra.InvalidRequest:
      yield self.create_table(dbconstants.DATASTORE_METADATA_TABLE,
                              dbconstants.DATASTORE_METADATA_SCHEMA)
      yield self.tornado_cassandra.execute(statement, parameters)

  @gen.coroutine
  def valid_data_version(self):
    """ Checks whether or not the data layout can be used.

    Returns:
      A boolean.
    """
    try:
      version = yield self.get_metadata(VERSION_INFO_KEY)
    except cassandra.InvalidRequest:
      raise gen.Return(False)

    is_expected_version = (
      version is not None and
      float(version) == CURRENT_VERSION
    )
    raise gen.Return(is_expected_version)

  @gen.coroutine
  def group_updates(self, groups):
    """ Fetch the latest transaction IDs for each group.

    Args:
      groups: An interable containing encoded Reference objects.
    Returns:
      A set of integers specifying transaction IDs.
    """
    query = 'SELECT * FROM group_updates WHERE group=%s'
    results = yield [
      self.tornado_cassandra.execute(query, [bytearray(group)])
      for group in groups
    ]
    updates = set(rows[0].last_update for rows in results if rows)
    raise gen.Return(updates)

  @gen.coroutine
  def start_transaction(self, app, txid, is_xg, in_progress):
    """ Persist transaction metadata.

    Args:
      app: A string containing an application ID.
      txid: An integer specifying the transaction ID.
      is_xg: A boolean specifying that the transaction is cross-group.
      in_progress: An iterable containing transaction IDs.
    """
    if in_progress:
      in_progress_bin = bytearray(
        struct.pack('q' * len(in_progress), *in_progress))
    else:
      in_progress_bin = None

    insert = (
      'INSERT INTO transactions (txid_hash, operation, namespace, path,'
      '                          start_time, is_xg, in_progress)'
      'VALUES (%(txid_hash)s, %(operation)s, %(namespace)s, %(path)s,'
      '        %(start_time)s, %(is_xg)s, %(in_progress)s)'
      'USING TTL {ttl}'
    ).format(ttl=dbconstants.MAX_TX_DURATION * 2)
    parameters = {'txid_hash': tx_partition(app, txid),
                  'operation': TxnActions.START,
                  'namespace': '',
                  'path': bytearray(''),
                  'start_time': datetime.datetime.utcnow(),
                  'is_xg': is_xg,
                  'in_progress': in_progress_bin}

    try:
      yield self.tornado_cassandra.execute(insert, parameters)
    except dbconstants.TRANSIENT_CASSANDRA_ERRORS:
      message = 'Exception while starting a transaction'
      logger.exception(message)
      raise AppScaleDBConnectionError(message)

  @gen.coroutine
  def put_entities_tx(self, app, txid, entities):
    """ Update transaction metadata with new put operations.

    Args:
      app: A string containing an application ID.
      txid: An integer specifying the transaction ID.
      entities: A list of entities that will be put upon commit.
    """
    batch = BatchStatement(consistency_level=ConsistencyLevel.QUORUM,
                           retry_policy=BASIC_RETRIES)
    insert = self.session.prepare("""
      INSERT INTO transactions (txid_hash, operation, namespace, path, entity)
      VALUES (?, ?, ?, ?, ?)
      USING TTL {ttl}
    """.format(ttl=dbconstants.MAX_TX_DURATION * 2))

    for entity in entities:
      args = (tx_partition(app, txid),
              TxnActions.MUTATE,
              entity.key().name_space(),
              bytearray(entity.key().path().Encode()),
              bytearray(entity.Encode()))
      batch.add(insert, args)

    try:
      yield self.tornado_cassandra.execute(batch)
    except dbconstants.TRANSIENT_CASSANDRA_ERRORS:
      message = 'Exception while putting entities in a transaction'
      logger.exception(message)
      raise AppScaleDBConnectionError(message)

  @gen.coroutine
  def delete_entities_tx(self, app, txid, entity_keys):
    """ Update transaction metadata with new delete operations.

    Args:
      app: A string containing an application ID.
      txid: An integer specifying the transaction ID.
      entity_keys: A list of entity keys that will be deleted upon commit.
    """
    batch = BatchStatement(consistency_level=ConsistencyLevel.QUORUM,
                           retry_policy=BASIC_RETRIES)
    insert = self.session.prepare("""
      INSERT INTO transactions (txid_hash, operation, namespace, path, entity)
      VALUES (?, ?, ?, ?, ?)
      USING TTL {ttl}
    """.format(ttl=dbconstants.MAX_TX_DURATION * 2))

    for key in entity_keys:
      # The None value overwrites previous puts.
      args = (tx_partition(app, txid),
              TxnActions.MUTATE,
              key.name_space(),
              bytearray(key.path().Encode()),
              None)
      batch.add(insert, args)

    try:
      yield self.tornado_cassandra.execute(batch)
    except dbconstants.TRANSIENT_CASSANDRA_ERRORS:
      message = 'Exception while deleting entities in a transaction'
      logger.exception(message)
      raise AppScaleDBConnectionError(message)

  @gen.coroutine
  def transactional_tasks_count(self, app, txid):
    """ Count the number of existing tasks associated with the transaction.

    Args:
      app: A string specifying an application ID.
      txid: An integer specifying a transaction ID.
    Returns:
      An integer specifying the number of existing tasks.
    """
    select = (
      'SELECT count(*) FROM transactions '
      'WHERE txid_hash = %(txid_hash)s '
      'AND operation = %(operation)s'
    )
    parameters = {'txid_hash': tx_partition(app, txid),
                  'operation': TxnActions.ENQUEUE_TASK}
    try:
      result = yield self.tornado_cassandra.execute(select, parameters)
      raise gen.Return(result[0].count)
    except dbconstants.TRANSIENT_CASSANDRA_ERRORS:
      message = 'Exception while fetching task count'
      logger.exception(message)
      raise AppScaleDBConnectionError(message)

  @gen.coroutine
  def add_transactional_tasks(self, app, txid, tasks, service_id, version_id):
    """ Add tasks to be enqueued upon the completion of a transaction.

    Args:
      app: A string specifying an application ID.
      txid: An integer specifying a transaction ID.
      tasks: A list of TaskQueueAddRequest objects.
      service_id: A string specifying the client's service ID.
      version_id: A string specifying the client's version ID.
    """
    batch = BatchStatement(consistency_level=ConsistencyLevel.QUORUM,
                           retry_policy=BASIC_RETRIES)
    query_str = (
      'INSERT INTO transactions (txid_hash, operation, namespace, path, task) '
      'VALUES (?, ?, ?, ?, ?) '
      'USING TTL {ttl}'
    ).format(ttl=dbconstants.MAX_TX_DURATION * 2)
    insert = self.session.prepare(query_str)

    for task in tasks:
      task.clear_transaction()

      # The path for the task entry doesn't matter as long as it's unique.
      path = bytearray(str(uuid.uuid4()))

      task_payload = '_'.join([service_id, version_id, task.Encode()])
      args = (tx_partition(app, txid),
              TxnActions.ENQUEUE_TASK,
              '',
              path,
              task_payload)
      batch.add(insert, args)

    try:
      yield self.tornado_cassandra.execute(batch)
    except dbconstants.TRANSIENT_CASSANDRA_ERRORS:
      message = 'Exception while adding tasks in a transaction'
      logger.exception(message)
      raise AppScaleDBConnectionError(message)

  @gen.coroutine
  def record_reads(self, app, txid, group_keys):
    """ Keep track of which entity groups were read in a transaction.

    Args:
      app: A string specifying an application ID.
      txid: An integer specifying a transaction ID.
      group_keys: An iterable containing Reference objects.
    """
    batch = BatchStatement(consistency_level=ConsistencyLevel.QUORUM,
                           retry_policy=BASIC_RETRIES)
    insert = self.session.prepare("""
      INSERT INTO transactions (txid_hash, operation, namespace, path)
      VALUES (?, ?, ?, ?)
      USING TTL {ttl}
    """.format(ttl=dbconstants.MAX_TX_DURATION * 2))

    for group_key in group_keys:
      if not isinstance(group_key, entity_pb.Reference):
        group_key = entity_pb.Reference(group_key)

      args = (tx_partition(app, txid),
              TxnActions.GET,
              group_key.name_space(),
              bytearray(group_key.path().Encode()))
      batch.add(insert, args)

    try:
      yield self.tornado_cassandra.execute(batch)
    except dbconstants.TRANSIENT_CASSANDRA_ERRORS:
      message = 'Exception while recording reads in a transaction'
      logger.exception(message)
      raise AppScaleDBConnectionError(message)

  @gen.coroutine
  def get_transaction_metadata(self, app, txid):
    """ Fetch transaction state.

    Args:
      app: A string specifying an application ID.
      txid: An integer specifying a transaction ID.
    Returns:
      A dictionary containing transaction state.
    """
    select = (
      'SELECT namespace, operation, path, start_time, is_xg, in_progress, '
      '       entity, task '
      'FROM transactions '
      'WHERE txid_hash = %(txid_hash)s '
    )
    parameters = {'txid_hash': tx_partition(app, txid)}
    try:
      results = yield self.tornado_cassandra.execute(select, parameters)
    except dbconstants.TRANSIENT_CASSANDRA_ERRORS:
      message = 'Exception while inserting entities in a transaction'
      logger.exception(message)
      raise AppScaleDBConnectionError(message)

    metadata = {'puts': {}, 'deletes': [], 'tasks': [], 'reads': set()}
    for result in results:
      if result.operation == TxnActions.START:
        metadata['start'] = result.start_time
        metadata['is_xg'] = result.is_xg
        metadata['in_progress'] = set()
        if metadata['in_progress'] is not None:
          metadata['in_progress'] = set(
            struct.unpack('q' * int(len(result.in_progress) / 8),
                          result.in_progress))
      if result.operation == TxnActions.MUTATE:
        key = create_key(app, result.namespace, result.path)
        if result.entity is None:
          metadata['deletes'].append(key)
        else:
          metadata['puts'][key.Encode()] = result.entity
      if result.operation == TxnActions.GET:
        group_key = create_key(app, result.namespace, result.path)
        metadata['reads'].add(group_key.Encode())
      if result.operation == TxnActions.ENQUEUE_TASK:
        service_id, version_id, task_pb = result.task.split('_', 2)
        task_metadata = {
          'service_id': service_id,
          'version_id': version_id,
          'task': taskqueue_service_pb.TaskQueueAddRequest(task_pb)}
        metadata['tasks'].append(task_metadata)
    raise gen.Return(metadata)
Exemplo n.º 17
0
class DatastoreProxy(AppDBInterface):
    def __init__(self):
        hosts = appscale_info.get_db_ips()

        remaining_retries = INITIAL_CONNECT_RETRIES
        while True:
            try:
                cluster = Cluster(hosts, load_balancing_policy=LB_POLICY)
                self.session = cluster.connect(keyspace=KEYSPACE)
                self.tornado_cassandra = TornadoCassandra(self.session)
                break
            except cassandra.cluster.NoHostAvailable as connection_error:
                remaining_retries -= 1
                if remaining_retries < 0:
                    raise connection_error
                time.sleep(3)

        self.session.default_consistency_level = ConsistencyLevel.QUORUM

        # Provide synchronous version of get_schema method
        self.get_schema_sync = tornado_synchronous(self.get_schema)

    @gen.coroutine
    def get_entity(self, table_name, row_key, column_names):
        error = [ERROR_DEFAULT]
        list_ = error
        row_key = bytearray('/'.join([table_name, row_key]))
        statement = """
      SELECT * FROM "{table}"
      WHERE {key} = %(key)s
      AND {column} IN %(columns)s
    """.format(table=table_name,
               key=ThriftColumn.KEY,
               column=ThriftColumn.COLUMN_NAME)
        query = SimpleStatement(statement, retry_policy=BASIC_RETRIES)
        parameters = {'key': row_key, 'columns': ValueSequence(column_names)}
        try:
            results = yield self.tornado_cassandra.execute(query, parameters)
        except dbconstants.TRANSIENT_CASSANDRA_ERRORS:
            raise AppScaleDBConnectionError('Unable to fetch entity')

        results_dict = {}
        for (_, column, value) in results:
            results_dict[column] = value

        if not results_dict:
            list_[0] += 'Not found'
            raise gen.Return(list_)

        for column in column_names:
            list_.append(results_dict[column])
        raise gen.Return(list_)

    @gen.coroutine
    def put_entity(self, table_name, row_key, column_names, cell_values):
        error = [ERROR_DEFAULT]
        list_ = error

        row_key = bytearray('/'.join([table_name, row_key]))
        values = {}
        for index, column in enumerate(column_names):
            values[column] = cell_values[index]

        statement = """
      INSERT INTO "{table}" ({key}, {column}, {value})
      VALUES (%(key)s, %(column)s, %(value)s)
    """.format(table=table_name,
               key=ThriftColumn.KEY,
               column=ThriftColumn.COLUMN_NAME,
               value=ThriftColumn.VALUE)
        batch = BatchStatement(retry_policy=BASIC_RETRIES)
        for column in column_names:
            parameters = {
                'key': row_key,
                'column': column,
                'value': bytearray(values[column])
            }
            batch.add(statement, parameters)

        try:
            yield self.tornado_cassandra.execute(batch)
        except dbconstants.TRANSIENT_CASSANDRA_ERRORS:
            list_[0] += 'Unable to insert entity'
            raise gen.Return(list_)

        list_.append("0")
        raise gen.Return(list_)

    def put_entity_dict(self, table_name, row_key, value_dict):
        raise NotImplementedError("put_entity_dict is not implemented in %s." %
                                  self.__class__)

    @gen.coroutine
    def get_table(self, table_name, column_names):
        """ Fetch a list of values for the given columns in a table.

    Args:
      table_name: A string containing the name of the table.
      column_names: A list of column names to retrieve values for.
    Returns:
      A list containing a status marker followed by the values.
      Note: The response does not contain any row keys or column names.
    """
        response = [ERROR_DEFAULT]

        statement = 'SELECT * FROM "{table}"'.format(table=table_name)
        query = SimpleStatement(statement, retry_policy=BASIC_RETRIES)

        try:
            results = yield self.tornado_cassandra.execute(query)
        except dbconstants.TRANSIENT_CASSANDRA_ERRORS:
            response[0] += 'Unable to fetch table contents'
            raise gen.Return(response)

        results_list = []
        current_item = {}
        current_key = None
        for (key, column, value) in results:
            if key != current_key:
                if current_item:
                    results_list.append({current_key: current_item})
                current_item = {}
                current_key = key

            current_item[column] = value
        if current_item:
            results_list.append({current_key: current_item})

        for result in results_list:
            result_columns = result.values()[0]
            for column in column_names:
                try:
                    response.append(result_columns[column])
                except KeyError:
                    response[0] += 'Table contents did not match schema'
                    raise gen.Return(response)

        raise gen.Return(response)

    @gen.coroutine
    def delete_row(self, table_name, row_key):
        response = [ERROR_DEFAULT]
        row_key = bytearray('/'.join([table_name, row_key]))

        statement = 'DELETE FROM "{table}" WHERE {key} = %s'.format(
            table=table_name, key=ThriftColumn.KEY)
        delete = SimpleStatement(statement, retry_policy=BASIC_RETRIES)

        try:
            yield self.tornado_cassandra.execute(delete, (row_key, ))
        except dbconstants.TRANSIENT_CASSANDRA_ERRORS:
            response[0] += 'Unable to delete row'
            raise gen.Return(response)

        response.append('0')
        raise gen.Return(response)

    @gen.coroutine
    def get_schema(self, table_name):
        error = [ERROR_DEFAULT]
        result = error
        ret = yield self.get_entity(SCHEMA_TABLE, table_name,
                                    SCHEMA_TABLE_SCHEMA)
        if len(ret) > 1:
            schema = ret[1]
        else:
            error[0] = ret[0] + "--unable to get schema"
            raise gen.Return(error)
        schema = schema.split(':')
        result = result + schema
        raise gen.Return(result)
Exemplo n.º 18
0
class DatastoreProxy(AppDBInterface):
  def __init__(self):
    hosts = appscale_info.get_db_ips()

    remaining_retries = INITIAL_CONNECT_RETRIES
    while True:
      try:
        cluster = Cluster(hosts, load_balancing_policy=LB_POLICY)
        self.session = cluster.connect(keyspace=KEYSPACE)
        self.tornado_cassandra = TornadoCassandra(self.session)
        break
      except cassandra.cluster.NoHostAvailable as connection_error:
        remaining_retries -= 1
        if remaining_retries < 0:
          raise connection_error
        time.sleep(3)

    self.session.default_consistency_level = ConsistencyLevel.QUORUM

    # Provide synchronous version of get_schema method
    self.get_schema_sync = tornado_synchronous(self.get_schema)

  @gen.coroutine
  def get_entity(self, table_name, row_key, column_names):
    error = [ERROR_DEFAULT]
    list_ = error
    row_key = bytearray('/'.join([table_name, row_key]))
    statement = """
      SELECT * FROM "{table}"
      WHERE {key} = %(key)s
      AND {column} IN %(columns)s
    """.format(table=table_name,
               key=ThriftColumn.KEY,
               column=ThriftColumn.COLUMN_NAME)
    query = SimpleStatement(statement, retry_policy=BASIC_RETRIES)
    parameters = {'key': row_key,
                  'columns': ValueSequence(column_names)}
    try:
      results = yield self.tornado_cassandra.execute(query, parameters)
    except dbconstants.TRANSIENT_CASSANDRA_ERRORS:
      raise AppScaleDBConnectionError('Unable to fetch entity')

    results_dict = {}
    for (_, column, value) in results:
      results_dict[column] = value

    if not results_dict:
      list_[0] += 'Not found'
      raise gen.Return(list_)

    for column in column_names:
      list_.append(results_dict[column])
    raise gen.Return(list_)

  @gen.coroutine
  def put_entity(self, table_name, row_key, column_names, cell_values):
    error = [ERROR_DEFAULT]
    list_ = error

    row_key = bytearray('/'.join([table_name, row_key]))
    values = {}
    for index, column in enumerate(column_names):
      values[column] = cell_values[index]

    statement = """
      INSERT INTO "{table}" ({key}, {column}, {value})
      VALUES (%(key)s, %(column)s, %(value)s)
    """.format(table=table_name,
               key=ThriftColumn.KEY,
               column=ThriftColumn.COLUMN_NAME,
               value=ThriftColumn.VALUE)
    batch = BatchStatement(retry_policy=BASIC_RETRIES)
    for column in column_names:
      parameters = {'key': row_key,
                   'column': column,
                   'value': bytearray(values[column])}
      batch.add(statement, parameters)

    try:
      yield self.tornado_cassandra.execute(batch)
    except dbconstants.TRANSIENT_CASSANDRA_ERRORS:
      list_[0] += 'Unable to insert entity'
      raise gen.Return(list_)

    list_.append("0")
    raise gen.Return(list_)

  def put_entity_dict(self, table_name, row_key, value_dict):
    raise NotImplementedError("put_entity_dict is not implemented in %s." % self.__class__)

  @gen.coroutine
  def get_table(self, table_name, column_names):
    """ Fetch a list of values for the given columns in a table.

    Args:
      table_name: A string containing the name of the table.
      column_names: A list of column names to retrieve values for.
    Returns:
      A list containing a status marker followed by the values.
      Note: The response does not contain any row keys or column names.
    """
    response = [ERROR_DEFAULT]

    statement = 'SELECT * FROM "{table}"'.format(table=table_name)
    query = SimpleStatement(statement, retry_policy=BASIC_RETRIES)

    try:
      results = yield self.tornado_cassandra.execute(query)
    except dbconstants.TRANSIENT_CASSANDRA_ERRORS:
      response[0] += 'Unable to fetch table contents'
      raise gen.Return(response)

    results_list = []
    current_item = {}
    current_key = None
    for (key, column, value) in results:
      if key != current_key:
        if current_item:
          results_list.append({current_key: current_item})
        current_item = {}
        current_key = key

      current_item[column] = value
    if current_item:
      results_list.append({current_key: current_item})

    for result in results_list:
      result_columns = result.values()[0]
      for column in column_names:
        try:
          response.append(result_columns[column])
        except KeyError:
          response[0] += 'Table contents did not match schema'
          raise gen.Return(response)

    raise gen.Return(response)

  @gen.coroutine
  def delete_row(self, table_name, row_key):
    response = [ERROR_DEFAULT]
    row_key = bytearray('/'.join([table_name, row_key]))

    statement = 'DELETE FROM "{table}" WHERE {key} = %s'.format(
      table=table_name, key=ThriftColumn.KEY)
    delete = SimpleStatement(statement, retry_policy=BASIC_RETRIES)

    try:
      yield self.tornado_cassandra.execute(delete, (row_key,))
    except dbconstants.TRANSIENT_CASSANDRA_ERRORS:
      response[0] += 'Unable to delete row'
      raise gen.Return(response)

    response.append('0')
    raise gen.Return(response)

  @gen.coroutine
  def get_schema(self, table_name):
    error = [ERROR_DEFAULT]
    result = error
    ret = yield self.get_entity(SCHEMA_TABLE, table_name, SCHEMA_TABLE_SCHEMA)
    if len(ret) > 1:
      schema = ret[1]
    else:
      error[0] = ret[0] + "--unable to get schema"
      raise gen.Return(error)
    schema = schema.split(':')
    result = result + schema
    raise gen.Return(result)
Exemplo n.º 19
0
class DatastoreProxy(AppDBInterface):
    """
    Cassandra implementation of the AppDBInterface
  """
    def __init__(self, log_level=logging.INFO, hosts=None):
        """
    Constructor.
    """
        class_name = self.__class__.__name__
        self.logger = logging.getLogger(class_name)
        self.logger.setLevel(log_level)
        self.logger.info('Starting {}'.format(class_name))

        if hosts is not None:
            self.hosts = hosts
        else:
            self.hosts = appscale_info.get_db_ips()

        remaining_retries = INITIAL_CONNECT_RETRIES
        while True:
            try:
                self.cluster = Cluster(self.hosts,
                                       default_retry_policy=BASIC_RETRIES,
                                       load_balancing_policy=LB_POLICY)
                self.session = self.cluster.connect(KEYSPACE)
                self.tornado_cassandra = TornadoCassandra(self.session)
                break
            except cassandra.cluster.NoHostAvailable as connection_error:
                remaining_retries -= 1
                if remaining_retries < 0:
                    raise connection_error
                time.sleep(3)

        self.session.default_consistency_level = ConsistencyLevel.QUORUM
        self.prepared_statements = {}

        # Provide synchronous version of some async methods
        self.batch_get_entity_sync = tornado_synchronous(self.batch_get_entity)
        self.batch_put_entity_sync = tornado_synchronous(self.batch_put_entity)
        self.batch_delete_sync = tornado_synchronous(self.batch_delete)
        self.valid_data_version_sync = tornado_synchronous(
            self.valid_data_version)
        self.range_query_sync = tornado_synchronous(self.range_query)
        self.get_metadata_sync = tornado_synchronous(self.get_metadata)
        self.set_metadata_sync = tornado_synchronous(self.set_metadata)
        self.delete_table_sync = tornado_synchronous(self.delete_table)

    def close(self):
        """ Close all sessions and connections to Cassandra. """
        self.cluster.shutdown()

    @gen.coroutine
    def batch_get_entity(self, table_name, row_keys, column_names):
        """
    Takes in batches of keys and retrieves their corresponding rows.

    Args:
      table_name: The table to access
      row_keys: A list of keys to access
      column_names: A list of columns to access
    Returns:
      A dictionary of rows and columns/values of those rows. The format
      looks like such: {key:{column_name:value,...}}
    Raises:
      TypeError: If an argument passed in was not of the expected type.
      AppScaleDBConnectionError: If the batch_get could not be performed due to
        an error with Cassandra.
    """
        if not isinstance(table_name, str): raise TypeError("Expected a str")
        if not isinstance(column_names, list):
            raise TypeError("Expected a list")
        if not isinstance(row_keys, list): raise TypeError("Expected a list")

        row_keys_bytes = [bytearray(row_key) for row_key in row_keys]

        statement = 'SELECT * FROM "{table}" '\
                    'WHERE {key} IN %s and {column} IN %s'.format(
                      table=table_name,
                      key=ThriftColumn.KEY,
                      column=ThriftColumn.COLUMN_NAME,
                    )
        query = SimpleStatement(statement, retry_policy=BASIC_RETRIES)

        results = []
        # Split the rows up into chunks to reduce the likelihood of timeouts.
        chunk_indexes = [
            (n, n + ENTITY_FETCH_THRESHOLD)
            for n in xrange(0, len(row_keys_bytes), ENTITY_FETCH_THRESHOLD)
        ]

        # TODO: This can be made more efficient by maintaining a constant number
        # of concurrent requests rather than waiting for each batch to complete.
        for start, end in chunk_indexes:
            parameters = (ValueSequence(row_keys_bytes[start:end]),
                          ValueSequence(column_names))
            try:
                batch_results = yield self.tornado_cassandra.execute(
                    query, parameters=parameters)
            except dbconstants.TRANSIENT_CASSANDRA_ERRORS:
                message = 'Exception during batch_get_entity'
                logger.exception(message)
                raise AppScaleDBConnectionError(message)

            results.extend(list(batch_results))

        results_dict = {row_key: {} for row_key in row_keys}
        for (key, column, value) in results:
            if key not in results_dict:
                results_dict[key] = {}

            results_dict[key][column] = value

        raise gen.Return(results_dict)

    @gen.coroutine
    def batch_put_entity(self,
                         table_name,
                         row_keys,
                         column_names,
                         cell_values,
                         ttl=None):
        """
    Allows callers to store multiple rows with a single call. A row can
    have multiple columns and values with them. We refer to each row as
    an entity.

    Args:
      table_name: The table to mutate
      row_keys: A list of keys to store on
      column_names: A list of columns to mutate
      cell_values: A dict of key/value pairs
      ttl: The number of seconds to keep the row.
    Raises:
      TypeError: If an argument passed in was not of the expected type.
      AppScaleDBConnectionError: If the batch_put could not be performed due to
        an error with Cassandra.
    """
        if not isinstance(table_name, str):
            raise TypeError("Expected a str")
        if not isinstance(column_names, list):
            raise TypeError("Expected a list")
        if not isinstance(row_keys, list):
            raise TypeError("Expected a list")
        if not isinstance(cell_values, dict):
            raise TypeError("Expected a dict")

        insert_str = ('INSERT INTO "{table}" ({key}, {column}, {value}) '
                      'VALUES (?, ?, ?)').format(
                          table=table_name,
                          key=ThriftColumn.KEY,
                          column=ThriftColumn.COLUMN_NAME,
                          value=ThriftColumn.VALUE)

        if ttl is not None:
            insert_str += 'USING TTL {}'.format(ttl)

        statement = self.session.prepare(insert_str)

        statements_and_params = []
        for row_key in row_keys:
            for column in column_names:
                params = (bytearray(row_key), column,
                          bytearray(cell_values[row_key][column]))
                statements_and_params.append((statement, params))

        try:
            yield [
                self.tornado_cassandra.execute(statement, parameters=params)
                for statement, params in statements_and_params
            ]
        except dbconstants.TRANSIENT_CASSANDRA_ERRORS:
            message = 'Exception during batch_put_entity'
            logger.exception(message)
            raise AppScaleDBConnectionError(message)

    def prepare_insert(self, table):
        """ Prepare an insert statement.

    Args:
      table: A string containing the table name.
    Returns:
      A PreparedStatement object.
    """
        statement = ('INSERT INTO "{table}" ({key}, {column}, {value}) '
                     'VALUES (?, ?, ?) '
                     'USING TIMESTAMP ?').format(
                         table=table,
                         key=ThriftColumn.KEY,
                         column=ThriftColumn.COLUMN_NAME,
                         value=ThriftColumn.VALUE)

        if statement not in self.prepared_statements:
            self.prepared_statements[statement] = self.session.prepare(
                statement)

        return self.prepared_statements[statement]

    def prepare_delete(self, table):
        """ Prepare a delete statement.

    Args:
      table: A string containing the table name.
    Returns:
      A PreparedStatement object.
    """
        statement = ('DELETE FROM "{table}" '
                     'USING TIMESTAMP ? '
                     'WHERE {key} = ?').format(table=table,
                                               key=ThriftColumn.KEY)

        if statement not in self.prepared_statements:
            self.prepared_statements[statement] = self.session.prepare(
                statement)

        return self.prepared_statements[statement]

    @gen.coroutine
    def normal_batch(self, mutations, txid):
        """ Use Cassandra's native batch statement to apply mutations atomically.

    Args:
      mutations: A list of dictionaries representing mutations.
      txid: An integer specifying a transaction ID.
    """
        self.logger.debug('Normal batch: {} mutations'.format(len(mutations)))
        batch = BatchStatement(consistency_level=ConsistencyLevel.QUORUM,
                               retry_policy=BASIC_RETRIES)
        prepared_statements = {'insert': {}, 'delete': {}}
        for mutation in mutations:
            table = mutation['table']

            if table == 'group_updates':
                key = mutation['key']
                insert = ('INSERT INTO group_updates (group, last_update) '
                          'VALUES (%(group)s, %(last_update)s) '
                          'USING TIMESTAMP %(timestamp)s')
                parameters = {
                    'group': key,
                    'last_update': mutation['last_update'],
                    'timestamp': get_write_time(txid)
                }
                batch.add(insert, parameters)
                continue

            if mutation['operation'] == Operations.PUT:
                if table not in prepared_statements['insert']:
                    prepared_statements['insert'][table] = self.prepare_insert(
                        table)
                values = mutation['values']
                for column in values:
                    batch.add(
                        prepared_statements['insert'][table],
                        (bytearray(mutation['key']), column,
                         bytearray(values[column]), get_write_time(txid)))
            elif mutation['operation'] == Operations.DELETE:
                if table not in prepared_statements['delete']:
                    prepared_statements['delete'][table] = self.prepare_delete(
                        table)
                batch.add(prepared_statements['delete'][table],
                          (get_write_time(txid), bytearray(mutation['key'])))

        try:
            yield self.tornado_cassandra.execute(batch)
        except dbconstants.TRANSIENT_CASSANDRA_ERRORS:
            message = 'Unable to apply batch'
            logger.exception(message)
            raise AppScaleDBConnectionError(message)

    def statements_for_mutations(self, mutations, txid):
        """ Generates Cassandra statements for a list of mutations.

    Args:
      mutations: A list of dictionaries representing mutations.
      txid: An integer specifying a transaction ID.
    Returns:
      A list of tuples containing Cassandra statements and parameters.
    """
        prepared_statements = {'insert': {}, 'delete': {}}
        statements_and_params = []
        for mutation in mutations:
            table = mutation['table']

            if table == 'group_updates':
                key = mutation['key']
                insert = ('INSERT INTO group_updates (group, last_update) '
                          'VALUES (%(group)s, %(last_update)s) '
                          'USING TIMESTAMP %(timestamp)s')
                parameters = {
                    'group': key,
                    'last_update': mutation['last_update'],
                    'timestamp': get_write_time(txid)
                }
                statements_and_params.append(
                    (SimpleStatement(insert), parameters))
                continue

            if mutation['operation'] == Operations.PUT:
                if table not in prepared_statements['insert']:
                    prepared_statements['insert'][table] = self.prepare_insert(
                        table)
                values = mutation['values']
                for column in values:
                    params = (bytearray(mutation['key']), column,
                              bytearray(values[column]), get_write_time(txid))
                    statements_and_params.append(
                        (prepared_statements['insert'][table], params))
            elif mutation['operation'] == Operations.DELETE:
                if table not in prepared_statements['delete']:
                    prepared_statements['delete'][table] = self.prepare_delete(
                        table)
                params = (get_write_time(txid), bytearray(mutation['key']))
                statements_and_params.append(
                    (prepared_statements['delete'][table], params))

        return statements_and_params

    @gen.coroutine
    def apply_mutations(self, mutations, txid):
        """ Apply mutations across tables.

    Args:
      mutations: A list of dictionaries representing mutations.
      txid: An integer specifying a transaction ID.
    """
        statements_and_params = self.statements_for_mutations(mutations, txid)
        yield [
            self.tornado_cassandra.execute(statement, parameters=params)
            for statement, params in statements_and_params
        ]

    @gen.coroutine
    def large_batch(self, app, mutations, entity_changes, txn):
        """ Insert or delete multiple rows across tables in an atomic statement.

    Args:
      app: A string containing the application ID.
      mutations: A list of dictionaries representing mutations.
      entity_changes: A list of changes at the entity level.
      txn: A transaction ID handler.
    Raises:
      FailedBatch if a concurrent process modifies the batch status.
      AppScaleDBConnectionError if a database connection error was encountered.
    """
        self.logger.debug('Large batch: transaction {}, {} mutations'.format(
            txn, len(mutations)))
        large_batch = LargeBatch(self.session, app, txn)
        try:
            yield large_batch.start()
        except FailedBatch as batch_error:
            raise BatchNotApplied(str(batch_error))

        insert_item = ('INSERT INTO batches (app, transaction, namespace, '
                       '                     path, old_value, new_value) '
                       'VALUES (?, ?, ?, ?, ?, ?)')
        insert_statement = self.session.prepare(insert_item)

        statements_and_params = []
        for entity_change in entity_changes:
            old_value = None
            if entity_change['old'] is not None:
                old_value = bytearray(entity_change['old'].Encode())
            new_value = None
            if entity_change['new'] is not None:
                new_value = bytearray(entity_change['new'].Encode())

            parameters = (app, txn, entity_change['key'].name_space(),
                          bytearray(entity_change['key'].path().Encode()),
                          old_value, new_value)
            statements_and_params.append((insert_statement, parameters))

        try:
            yield [
                self.tornado_cassandra.execute(statement, parameters=params)
                for statement, params in statements_and_params
            ]
        except dbconstants.TRANSIENT_CASSANDRA_ERRORS:
            message = 'Unable to write large batch log'
            logger.exception(message)
            raise BatchNotApplied(message)

        # Since failing after this point is expensive and time consuming, retry
        # operations to make a failure less likely.
        custom_retry_coroutine = retry_raw_coroutine(
            backoff_threshold=5,
            retrying_timeout=10,
            retry_on_exception=dbconstants.TRANSIENT_CASSANDRA_ERRORS)

        persistent_apply_batch = custom_retry_coroutine(
            large_batch.set_applied)
        try:
            yield persistent_apply_batch()
        except FailedBatch as batch_error:
            raise AppScaleDBConnectionError(str(batch_error))

        persistent_apply_mutations = custom_retry_coroutine(
            self.apply_mutations)
        try:
            yield persistent_apply_mutations(mutations, txn)
        except dbconstants.TRANSIENT_CASSANDRA_ERRORS:
            message = 'Exception during large batch'
            logger.exception(message)
            raise AppScaleDBConnectionError(message)

        try:
            yield large_batch.cleanup()
        except FailedBatch:
            # This should not raise an exception since the batch is already applied.
            logger.exception('Unable to clear batch status')

        clear_batch = ('DELETE FROM batches '
                       'WHERE app = %(app)s AND transaction = %(transaction)s')
        parameters = {'app': app, 'transaction': txn}
        try:
            yield self.tornado_cassandra.execute(clear_batch, parameters)
        except dbconstants.TRANSIENT_CASSANDRA_ERRORS:
            logger.exception('Unable to clear batch log')

    @gen.coroutine
    def batch_delete(self, table_name, row_keys, column_names=()):
        """
    Remove a set of rows corresponding to a set of keys.

    Args:
      table_name: Table to delete rows from
      row_keys: A list of keys to remove
      column_names: Not used
    Raises:
      TypeError: If an argument passed in was not of the expected type.
      AppScaleDBConnectionError: If the batch_delete could not be performed due
        to an error with Cassandra.
    """
        if not isinstance(table_name, str): raise TypeError("Expected a str")
        if not isinstance(row_keys, list): raise TypeError("Expected a list")

        row_keys_bytes = [bytearray(row_key) for row_key in row_keys]

        statement = 'DELETE FROM "{table}" WHERE {key} IN %s'.\
          format(
            table=table_name,
            key=ThriftColumn.KEY
          )
        query = SimpleStatement(statement, retry_policy=BASIC_RETRIES)
        parameters = (ValueSequence(row_keys_bytes), )

        try:
            yield self.tornado_cassandra.execute(query, parameters=parameters)
        except dbconstants.TRANSIENT_CASSANDRA_ERRORS:
            message = 'Exception during batch_delete'
            logger.exception(message)
            raise AppScaleDBConnectionError(message)

    @gen.coroutine
    def delete_table(self, table_name):
        """
    Drops a given table (aka column family in Cassandra)

    Args:
      table_name: A string name of the table to drop
    Raises:
      TypeError: If an argument passed in was not of the expected type.
      AppScaleDBConnectionError: If the delete_table could not be performed due
        to an error with Cassandra.
    """
        if not isinstance(table_name, str): raise TypeError("Expected a str")

        statement = 'DROP TABLE IF EXISTS "{table}"'.format(table=table_name)
        query = SimpleStatement(statement, retry_policy=BASIC_RETRIES)

        try:
            yield self.tornado_cassandra.execute(query)
        except dbconstants.TRANSIENT_CASSANDRA_ERRORS:
            message = 'Exception during delete_table'
            logger.exception(message)
            raise AppScaleDBConnectionError(message)

    @gen.coroutine
    def create_table(self, table_name, column_names):
        """
    Creates a table if it doesn't already exist.

    Args:
      table_name: The column family name
      column_names: Not used but here to match the interface
    Raises:
      TypeError: If an argument passed in was not of the expected type.
      AppScaleDBConnectionError: If the create_table could not be performed due
        to an error with Cassandra.
    """
        if not isinstance(table_name, str): raise TypeError("Expected a str")
        if not isinstance(column_names, list):
            raise TypeError("Expected a list")

        statement = ('CREATE TABLE IF NOT EXISTS "{table}" ('
                     '{key} blob,'
                     '{column} text,'
                     '{value} blob,'
                     'PRIMARY KEY ({key}, {column})'
                     ') WITH COMPACT STORAGE').format(
                         table=table_name,
                         key=ThriftColumn.KEY,
                         column=ThriftColumn.COLUMN_NAME,
                         value=ThriftColumn.VALUE)
        query = SimpleStatement(statement, retry_policy=NO_RETRIES)

        try:
            yield self.tornado_cassandra.execute(query,
                                                 timeout=SCHEMA_CHANGE_TIMEOUT)
        except cassandra.OperationTimedOut:
            logger.warning(
                'Encountered an operation timeout while creating a table. Waiting {} '
                'seconds for schema to settle.'.format(SCHEMA_CHANGE_TIMEOUT))
            time.sleep(SCHEMA_CHANGE_TIMEOUT)
            raise AppScaleDBConnectionError('Exception during create_table')
        except (error for error in dbconstants.TRANSIENT_CASSANDRA_ERRORS
                if error != cassandra.OperationTimedOut):
            message = 'Exception during create_table'
            logger.exception(message)
            raise AppScaleDBConnectionError(message)

    @gen.coroutine
    def range_query(self,
                    table_name,
                    column_names,
                    start_key,
                    end_key,
                    limit,
                    offset=0,
                    start_inclusive=True,
                    end_inclusive=True,
                    keys_only=False):
        """
    Gets a dense range ordered by keys. Returns an ordered list of
    a dictionary of [key:{column1:value1, column2:value2},...]
    or a list of keys if keys only.

    Args:
      table_name: Name of table to access
      column_names: Columns which get returned within the key range
      start_key: String for which the query starts at
      end_key: String for which the query ends at
      limit: Maximum number of results to return
      offset: Cuts off these many from the results [offset:]
      start_inclusive: Boolean if results should include the start_key
      end_inclusive: Boolean if results should include the end_key
      keys_only: Boolean if to only keys and not values
    Raises:
      TypeError: If an argument passed in was not of the expected type.
      AppScaleDBConnectionError: If the range_query could not be performed due
        to an error with Cassandra.
    Returns:
      An ordered list of dictionaries of key=>columns/values
    """
        if not isinstance(table_name, str):
            raise TypeError('table_name must be a string')
        if not isinstance(column_names, list):
            raise TypeError('column_names must be a list')
        if not isinstance(start_key, str):
            raise TypeError('start_key must be a string')
        if not isinstance(end_key, str):
            raise TypeError('end_key must be a string')
        if not isinstance(limit, (int, long)) and limit is not None:
            raise TypeError('limit must be int, long, or NoneType')
        if not isinstance(offset, (int, long)):
            raise TypeError('offset must be int or long')

        if start_inclusive:
            gt_compare = '>='
        else:
            gt_compare = '>'

        if end_inclusive:
            lt_compare = '<='
        else:
            lt_compare = '<'

        query_limit = ''
        if limit is not None:
            query_limit = 'LIMIT {}'.format(len(column_names) * limit)

        statement = ('SELECT * FROM "{table}" WHERE '
                     'token({key}) {gt_compare} %s AND '
                     'token({key}) {lt_compare} %s AND '
                     '{column} IN %s '
                     '{limit} '
                     'ALLOW FILTERING').format(table=table_name,
                                               key=ThriftColumn.KEY,
                                               gt_compare=gt_compare,
                                               lt_compare=lt_compare,
                                               column=ThriftColumn.COLUMN_NAME,
                                               limit=query_limit)

        query = SimpleStatement(statement, retry_policy=BASIC_RETRIES)
        parameters = (bytearray(start_key), bytearray(end_key),
                      ValueSequence(column_names))

        try:
            results = yield self.tornado_cassandra.execute(
                query, parameters=parameters)

            results_list = []
            current_item = {}
            current_key = None
            for (key, column, value) in results:
                if keys_only:
                    results_list.append(key)
                    continue

                if key != current_key:
                    if current_item:
                        results_list.append({current_key: current_item})
                    current_item = {}
                    current_key = key

                current_item[column] = value
            if current_item:
                results_list.append({current_key: current_item})
            raise gen.Return(results_list[offset:])
        except dbconstants.TRANSIENT_CASSANDRA_ERRORS:
            message = 'Exception during range_query'
            logger.exception(message)
            raise AppScaleDBConnectionError(message)

    @gen.coroutine
    def get_metadata(self, key):
        """ Retrieve a value from the datastore metadata table.

    Args:
      key: A string containing the key to fetch.
    Returns:
      A string containing the value or None if the key is not present.
    """
        statement = ('SELECT {value} FROM "{table}" '
                     'WHERE {key} = %s '
                     'AND {column} = %s').format(
                         value=ThriftColumn.VALUE,
                         table=dbconstants.DATASTORE_METADATA_TABLE,
                         key=ThriftColumn.KEY,
                         column=ThriftColumn.COLUMN_NAME)
        try:
            results = yield self.tornado_cassandra.execute(
                statement, (bytearray(key), key))
        except dbconstants.TRANSIENT_CASSANDRA_ERRORS:
            message = 'Unable to fetch {} from datastore metadata'.format(key)
            logger.exception(message)
            raise AppScaleDBConnectionError(message)

        try:
            raise gen.Return(results[0].value)
        except IndexError:
            return

    @gen.coroutine
    def set_metadata(self, key, value):
        """ Set a datastore metadata value.

    Args:
      key: A string containing the key to set.
      value: A string containing the value to set.
    """
        if not isinstance(key, str):
            raise TypeError('key should be a string')

        if not isinstance(value, str):
            raise TypeError('value should be a string')

        statement = ('INSERT INTO "{table}" ({key}, {column}, {value}) '
                     'VALUES (%(key)s, %(column)s, %(value)s)').format(
                         table=dbconstants.DATASTORE_METADATA_TABLE,
                         key=ThriftColumn.KEY,
                         column=ThriftColumn.COLUMN_NAME,
                         value=ThriftColumn.VALUE)
        parameters = {
            'key': bytearray(key),
            'column': key,
            'value': bytearray(value)
        }
        try:
            yield self.tornado_cassandra.execute(statement, parameters)
        except dbconstants.TRANSIENT_CASSANDRA_ERRORS:
            message = 'Unable to set datastore metadata for {}'.format(key)
            logger.exception(message)
            raise AppScaleDBConnectionError(message)
        except cassandra.InvalidRequest:
            yield self.create_table(dbconstants.DATASTORE_METADATA_TABLE,
                                    dbconstants.DATASTORE_METADATA_SCHEMA)
            yield self.tornado_cassandra.execute(statement, parameters)

    @gen.coroutine
    def valid_data_version(self):
        """ Checks whether or not the data layout can be used.

    Returns:
      A boolean.
    """
        try:
            version = yield self.get_metadata(VERSION_INFO_KEY)
        except cassandra.InvalidRequest:
            raise gen.Return(False)

        is_expected_version = (version is not None
                               and float(version) == CURRENT_VERSION)
        raise gen.Return(is_expected_version)

    @gen.coroutine
    def group_updates(self, groups):
        """ Fetch the latest transaction IDs for each group.

    Args:
      groups: An interable containing encoded Reference objects.
    Returns:
      A set of integers specifying transaction IDs.
    """
        query = 'SELECT * FROM group_updates WHERE group=%s'
        results = yield [
            self.tornado_cassandra.execute(query, [bytearray(group)])
            for group in groups
        ]
        updates = set(rows[0].last_update for rows in results if rows)
        raise gen.Return(updates)

    @gen.coroutine
    def start_transaction(self, app, txid, is_xg, in_progress):
        """ Persist transaction metadata.

    Args:
      app: A string containing an application ID.
      txid: An integer specifying the transaction ID.
      is_xg: A boolean specifying that the transaction is cross-group.
      in_progress: An iterable containing transaction IDs.
    """
        if in_progress:
            in_progress_bin = bytearray(
                struct.pack('q' * len(in_progress), *in_progress))
        else:
            in_progress_bin = None

        insert = (
            'INSERT INTO transactions (txid_hash, operation, namespace, path,'
            '                          start_time, is_xg, in_progress)'
            'VALUES (%(txid_hash)s, %(operation)s, %(namespace)s, %(path)s,'
            '        %(start_time)s, %(is_xg)s, %(in_progress)s)'
            'USING TTL {ttl}').format(ttl=dbconstants.MAX_TX_DURATION * 2)
        parameters = {
            'txid_hash': tx_partition(app, txid),
            'operation': TxnActions.START,
            'namespace': '',
            'path': bytearray(''),
            'start_time': datetime.datetime.utcnow(),
            'is_xg': is_xg,
            'in_progress': in_progress_bin
        }

        try:
            yield self.tornado_cassandra.execute(insert, parameters)
        except dbconstants.TRANSIENT_CASSANDRA_ERRORS:
            message = 'Exception while starting a transaction'
            logger.exception(message)
            raise AppScaleDBConnectionError(message)

    @gen.coroutine
    def put_entities_tx(self, app, txid, entities):
        """ Update transaction metadata with new put operations.

    Args:
      app: A string containing an application ID.
      txid: An integer specifying the transaction ID.
      entities: A list of entities that will be put upon commit.
    """
        batch = BatchStatement(consistency_level=ConsistencyLevel.QUORUM,
                               retry_policy=BASIC_RETRIES)
        insert = self.session.prepare("""
      INSERT INTO transactions (txid_hash, operation, namespace, path, entity)
      VALUES (?, ?, ?, ?, ?)
      USING TTL {ttl}
    """.format(ttl=dbconstants.MAX_TX_DURATION * 2))

        for entity in entities:
            args = (tx_partition(app, txid), TxnActions.MUTATE,
                    entity.key().name_space(),
                    bytearray(entity.key().path().Encode()),
                    bytearray(entity.Encode()))
            batch.add(insert, args)

        try:
            yield self.tornado_cassandra.execute(batch)
        except dbconstants.TRANSIENT_CASSANDRA_ERRORS:
            message = 'Exception while putting entities in a transaction'
            logger.exception(message)
            raise AppScaleDBConnectionError(message)

    @gen.coroutine
    def delete_entities_tx(self, app, txid, entity_keys):
        """ Update transaction metadata with new delete operations.

    Args:
      app: A string containing an application ID.
      txid: An integer specifying the transaction ID.
      entity_keys: A list of entity keys that will be deleted upon commit.
    """
        batch = BatchStatement(consistency_level=ConsistencyLevel.QUORUM,
                               retry_policy=BASIC_RETRIES)
        insert = self.session.prepare("""
      INSERT INTO transactions (txid_hash, operation, namespace, path, entity)
      VALUES (?, ?, ?, ?, ?)
      USING TTL {ttl}
    """.format(ttl=dbconstants.MAX_TX_DURATION * 2))

        for key in entity_keys:
            # The None value overwrites previous puts.
            args = (tx_partition(app,
                                 txid), TxnActions.MUTATE, key.name_space(),
                    bytearray(key.path().Encode()), None)
            batch.add(insert, args)

        try:
            yield self.tornado_cassandra.execute(batch)
        except dbconstants.TRANSIENT_CASSANDRA_ERRORS:
            message = 'Exception while deleting entities in a transaction'
            logger.exception(message)
            raise AppScaleDBConnectionError(message)

    @gen.coroutine
    def transactional_tasks_count(self, app, txid):
        """ Count the number of existing tasks associated with the transaction.

    Args:
      app: A string specifying an application ID.
      txid: An integer specifying a transaction ID.
    Returns:
      An integer specifying the number of existing tasks.
    """
        select = ('SELECT count(*) FROM transactions '
                  'WHERE txid_hash = %(txid_hash)s '
                  'AND operation = %(operation)s')
        parameters = {
            'txid_hash': tx_partition(app, txid),
            'operation': TxnActions.ENQUEUE_TASK
        }
        try:
            result = yield self.tornado_cassandra.execute(select, parameters)
            raise gen.Return(result[0].count)
        except dbconstants.TRANSIENT_CASSANDRA_ERRORS:
            message = 'Exception while fetching task count'
            logger.exception(message)
            raise AppScaleDBConnectionError(message)

    @gen.coroutine
    def add_transactional_tasks(self, app, txid, tasks, service_id,
                                version_id):
        """ Add tasks to be enqueued upon the completion of a transaction.

    Args:
      app: A string specifying an application ID.
      txid: An integer specifying a transaction ID.
      tasks: A list of TaskQueueAddRequest objects.
      service_id: A string specifying the client's service ID.
      version_id: A string specifying the client's version ID.
    """
        batch = BatchStatement(consistency_level=ConsistencyLevel.QUORUM,
                               retry_policy=BASIC_RETRIES)
        query_str = (
            'INSERT INTO transactions (txid_hash, operation, namespace, path, task) '
            'VALUES (?, ?, ?, ?, ?) '
            'USING TTL {ttl}').format(ttl=dbconstants.MAX_TX_DURATION * 2)
        insert = self.session.prepare(query_str)

        for task in tasks:
            task.clear_transaction()

            # The path for the task entry doesn't matter as long as it's unique.
            path = bytearray(str(uuid.uuid4()))

            task_payload = '_'.join([service_id, version_id, task.Encode()])
            args = (tx_partition(app, txid), TxnActions.ENQUEUE_TASK, '', path,
                    task_payload)
            batch.add(insert, args)

        try:
            yield self.tornado_cassandra.execute(batch)
        except dbconstants.TRANSIENT_CASSANDRA_ERRORS:
            message = 'Exception while adding tasks in a transaction'
            logger.exception(message)
            raise AppScaleDBConnectionError(message)

    @gen.coroutine
    def record_reads(self, app, txid, group_keys):
        """ Keep track of which entity groups were read in a transaction.

    Args:
      app: A string specifying an application ID.
      txid: An integer specifying a transaction ID.
      group_keys: An iterable containing Reference objects.
    """
        batch = BatchStatement(consistency_level=ConsistencyLevel.QUORUM,
                               retry_policy=BASIC_RETRIES)
        insert = self.session.prepare("""
      INSERT INTO transactions (txid_hash, operation, namespace, path)
      VALUES (?, ?, ?, ?)
      USING TTL {ttl}
    """.format(ttl=dbconstants.MAX_TX_DURATION * 2))

        for group_key in group_keys:
            if not isinstance(group_key, entity_pb.Reference):
                group_key = entity_pb.Reference(group_key)

            args = (tx_partition(app,
                                 txid), TxnActions.GET, group_key.name_space(),
                    bytearray(group_key.path().Encode()))
            batch.add(insert, args)

        try:
            yield self.tornado_cassandra.execute(batch)
        except dbconstants.TRANSIENT_CASSANDRA_ERRORS:
            message = 'Exception while recording reads in a transaction'
            logger.exception(message)
            raise AppScaleDBConnectionError(message)

    @gen.coroutine
    def get_transaction_metadata(self, app, txid):
        """ Fetch transaction state.

    Args:
      app: A string specifying an application ID.
      txid: An integer specifying a transaction ID.
    Returns:
      A dictionary containing transaction state.
    """
        select = (
            'SELECT namespace, operation, path, start_time, is_xg, in_progress, '
            '       entity, task '
            'FROM transactions '
            'WHERE txid_hash = %(txid_hash)s ')
        parameters = {'txid_hash': tx_partition(app, txid)}
        try:
            results = yield self.tornado_cassandra.execute(select, parameters)
        except dbconstants.TRANSIENT_CASSANDRA_ERRORS:
            message = 'Exception while inserting entities in a transaction'
            logger.exception(message)
            raise AppScaleDBConnectionError(message)

        metadata = {'puts': {}, 'deletes': [], 'tasks': [], 'reads': set()}
        for result in results:
            if result.operation == TxnActions.START:
                metadata['start'] = result.start_time
                metadata['is_xg'] = result.is_xg
                metadata['in_progress'] = set()
                if metadata['in_progress'] is not None:
                    metadata['in_progress'] = set(
                        struct.unpack('q' * int(len(result.in_progress) / 8),
                                      result.in_progress))
            if result.operation == TxnActions.MUTATE:
                key = create_key(app, result.namespace, result.path)
                if result.entity is None:
                    metadata['deletes'].append(key)
                else:
                    metadata['puts'][key.Encode()] = result.entity
            if result.operation == TxnActions.GET:
                group_key = create_key(app, result.namespace, result.path)
                metadata['reads'].add(group_key.Encode())
            if result.operation == TxnActions.ENQUEUE_TASK:
                service_id, version_id, task_pb = result.task.split('_', 2)
                task_metadata = {
                    'service_id': service_id,
                    'version_id': version_id,
                    'task': taskqueue_service_pb.TaskQueueAddRequest(task_pb)
                }
                metadata['tasks'].append(task_metadata)
        raise gen.Return(metadata)
Exemplo n.º 20
0
class LargeBatch(object):
    def __init__(self, session, project, txid):
        """ Create a new LargeBatch object.

    Args:
      session: A cassandra-driver session.
      project: A string specifying a project ID.
      txid: An integer specifying a transaction ID.
    """
        self.session = session
        self.tornado_cassandra = TornadoCassandra(self.session)
        self.project = project
        self.txid = txid

        # Create an identifier so that it's possible to check if operations succeed
        # after a timeout.
        self.op_id = uuid.uuid4()

        # This value is used when claiming an existing failed batch.
        self.read_op_id = None

        # Indicates if the batch has been applied.
        self.applied = False

    @gen.coroutine
    def is_applied(self, retries=5):
        """ Fetch the status of the batch.

    Args:
      retries: The number of times to retry after failures.
    Returns:
      A boolean indicating whether or not the batch has been applied.
    Raises:
      BatchNotFound if the batch cannot be found.
      BatchNotOwned if a different process owns the batch.
    """
        if self.applied:
            raise gen.Return(True)

        get_status = """
      SELECT applied, op_id FROM batch_status
      WHERE txid_hash = %(txid_hash)s
    """
        query = SimpleStatement(get_status,
                                retry_policy=BASIC_RETRIES,
                                consistency_level=ConsistencyLevel.SERIAL)
        parameters = {'txid_hash': tx_partition(self.project, self.txid)}

        try:
            results = yield self.tornado_cassandra.execute(
                query, parameters=parameters)
            result = results[0]
            if result.op_id != self.op_id:
                self.read_op_id = result.op_id
                raise BatchNotOwned('{} does not match {}'.format(
                    self.op_id, result.op_id))
            raise gen.Return(result.applied)
        except TRANSIENT_CASSANDRA_ERRORS:
            retries_left = retries - 1
            if retries_left < 0:
                raise

            logger.debug('Unable to read batch status. Retrying.')
            is_applied = yield self.is_applied(retries=retries_left)
            raise gen.Return(is_applied)
        except IndexError:
            raise BatchNotFound('Batch for {}:{} not found'.format(
                self.project, self.txid))

    @gen.coroutine
    def start(self, retries=5):
        """ Mark the batch as being in progress.

    Args:
      retries: The number of times to retry after failures.
    Raises:
      FailedBatch if the batch cannot be marked as being started.
    """
        if retries < 0:
            raise FailedBatch('Retries exhausted while starting batch')

        insert = SimpleStatement("""
      INSERT INTO batch_status (txid_hash, applied, op_id)
      VALUES (%(txid_hash)s, False, %(op_id)s)
      IF NOT EXISTS
    """,
                                 retry_policy=NO_RETRIES)
        parameters = {
            'txid_hash': tx_partition(self.project, self.txid),
            'op_id': self.op_id
        }

        try:
            result = yield self.tornado_cassandra.execute(insert, parameters)
        except TRANSIENT_CASSANDRA_ERRORS:
            yield self.start(retries=retries - 1)
            return

        if result.was_applied:
            return

        # Make sure this process was responsible for the insert.
        try:
            yield self.is_applied()
        except (BatchNotOwned, TRANSIENT_CASSANDRA_ERRORS) as batch_failure:
            raise FailedBatch(str(batch_failure))
        except BatchNotFound:
            yield self.start(retries=retries - 1)
            return

    @gen.coroutine
    def set_applied(self, retries=5):
        """ Mark the batch as being applied.

    Args:
      retries: The number of times to retry after failures.
    Raises:
      FailedBatch if the batch cannot be marked as applied.
    """
        if retries < 0:
            raise FailedBatch('Retries exhausted while updating batch')

        update_status = SimpleStatement("""
      UPDATE batch_status
      SET applied = True
      WHERE txid_hash = %(txid_hash)s
      IF op_id = %(op_id)s
    """,
                                        retry_policy=NO_RETRIES)
        parameters = {
            'txid_hash': tx_partition(self.project, self.txid),
            'op_id': self.op_id
        }

        try:
            result = yield self.tornado_cassandra.execute(
                update_status, parameters)
            if result.was_applied:
                self.applied = True
                return
        except TRANSIENT_CASSANDRA_ERRORS:
            pass  # Application is confirmed below.

        try:
            self.applied = yield self.is_applied()
            if self.applied:
                return
            yield self.set_applied(retries=retries - 1)
            return
        except (BatchNotFound, BatchNotOwned,
                TRANSIENT_CASSANDRA_ERRORS) as error:
            raise FailedBatch(str(error))

    @gen.coroutine
    def cleanup(self, retries=5):
        """ Clean up the batch status entry.

    Args:
      retries: The number of times to retry after failures.
    Raises:
      FailedBatch if the batch cannot be marked as applied.
    """
        if retries < 0:
            raise FailedBatch('Retries exhausted while cleaning up batch')

        clear_status = SimpleStatement("""
      DELETE FROM batch_status
      WHERE txid_hash = %(txid_hash)s
      IF op_id = %(op_id)s
    """,
                                       retry_policy=NO_RETRIES)
        parameters = {
            'txid_hash': tx_partition(self.project, self.txid),
            'op_id': self.op_id
        }

        try:
            result = yield self.tornado_cassandra.execute(
                clear_status, parameters)
        except TRANSIENT_CASSANDRA_ERRORS:
            yield self.cleanup(retries=retries - 1)
            return

        if not result.was_applied:
            raise FailedBatch('Unable to clean up batch for {}:{}'.format(
                self.project, self.txid))