Пример #1
0
class CachedQueryMutator(object):
    """Utility to manipulate cached queries with batching.

    This implements the context manager protocol so it can be used with the
    with statement for clean batches.

    """

    def __init__(self):
        self.mutator = Mutator(CONNECTION_POOL)
        self.to_prune = set()

    def __enter__(self):
        return self

    def __exit__(self, type, value, traceback):
        self.send()

    def insert(self, query, things):
        """Insert items into the given cached query.

        If the items are already in the query, they will have their sorts
        updated.

        This will sometimes trigger pruning with a configurable probability
        (see g.querycache_prune_chance).

        """
        if not things:
            return

        LOG.debug("Inserting %r into query %r", things, query)

        assert not query.is_precomputed
        query._insert(self.mutator, things)

        if (random.random() / len(things)) < PRUNE_CHANCE:
            self.to_prune.add(query)

    def delete(self, query, things):
        """Remove things from the query."""
        if not things:
            return

        LOG.debug("Deleting %r from query %r", things, query)

        query._delete(self.mutator, things)

    def send(self):
        """Commit the mutations batched up so far and potentially do pruning.

        This is automatically called by __exit__ when used as a context
        manager.

        """
        self.mutator.send()

        if self.to_prune:
            LOG.debug("Pruning queries %r", self.to_prune)
            CachedQuery._prune_multi(self.to_prune)
Пример #2
0
class CachedQueryMutator(object):
    """Utility to manipulate cached queries with batching.

    This implements the context manager protocol so it can be used with the
    with statement for clean batches.

    """

    def __init__(self):
        self.mutator = Mutator(CONNECTION_POOL)
        self.to_prune = set()

    def __enter__(self):
        return self

    def __exit__(self, type, value, traceback):
        self.send()

    def insert(self, query, things):
        """Insert items into the given cached query.

        If the items are already in the query, they will have their sorts
        updated.

        This will sometimes trigger pruning with a configurable probability
        (see g.querycache_prune_chance).

        """
        if not things:
            return

        LOG.debug("Inserting %r into query %r", things, query)

        assert not query.is_precomputed
        query._insert(self.mutator, things)

        if (random.random() / len(things)) < PRUNE_CHANCE:
            self.to_prune.add(query)

    def delete(self, query, things):
        """Remove things from the query."""
        if not things:
            return

        LOG.debug("Deleting %r from query %r", things, query)

        query._delete(self.mutator, things)

    def send(self):
        """Commit the mutations batched up so far and potentially do pruning.

        This is automatically called by __exit__ when used as a context
        manager.

        """
        self.mutator.send()

        if self.to_prune:
            LOG.debug("Pruning queries %r", self.to_prune)
            CachedQuery._prune_multi(self.to_prune)
Пример #3
0
class CachedQueryMutator(object):
    def __init__(self):
        self.mutator = Mutator(CONNECTION_POOL)
        self.to_prune = set()

    def __enter__(self):
        self.mutator.__enter__()
        return self

    def __exit__(self, type, value, traceback):
        self.mutator.__exit__(type, value, traceback)

        if self.to_prune:
            CachedQuery._prune_multi(self.to_prune)

    def insert(self, query, things):
        if not things:
            return

        query._insert(self.mutator, things)

        if (random.random() / len(things)) < PRUNE_CHANCE:
            self.to_prune.add(query)

    def delete(self, query, things):
        if not things:
            return

        query._delete(self.mutator, things)
Пример #4
0
def set_account_ip(account_id, ip, date=None):
    """Set an IP address as having accessed an account.

    Updates all underlying datastores.
    """
    if date is None:
        date = datetime.datetime.now(g.tz)
    m = Mutator(CONNECTION_POOL)
    m.insert(IPsByAccount._cf, str(account_id), {date: ip}, ttl=CF_TTL)
    m.insert(AccountsByIP._cf, ip, {date: str(account_id)}, ttl=CF_TTL)
    m.send()
Пример #5
0
def set_account_ip(account_id, ip, date=None):
    """Set an IP address as having accessed an account.

    Updates all underlying datastores.
    """
    # don't store private IPs, send a graphite event so we can alert on this
    if ip_address(ip).is_private:
        g.stats.simple_event('ip.private_ip_storage_prevented')
        return

    if date is None:
        date = datetime.datetime.now(g.tz)
    m = Mutator(CONNECTION_POOL)
    m.insert(IPsByAccount._cf, str(account_id), {date: ip}, ttl=CF_TTL)
    m.insert(AccountsByIP._cf, ip, {date: str(account_id)}, ttl=CF_TTL)
    m.send()
Пример #6
0
    def _prune_multi(cls, queries):
        cls._fetch_multi(queries)

        with Mutator(CONNECTION_POOL) as m:
            for q in queries:
                q._sort_data()
                q._prune(m)
Пример #7
0
    def save_log(self, application, host, severity, timestamp, message):
        """
        Saves a log message.
        Raises:
        - DaedalusException if any parameter isn't valid.
        """
        _check_application(application)
        _check_severity(severity)
        _check_host(host)
        _check_message(message)
        try:
            timestamp = float(timestamp)
        except:
            raise (DaedalusException(
                "The timestamp '{0}' couldn't be transformed to a float".
                format(timestamp)))

        event_uuid = convert_time_to_uuid(timestamp, randomize=True)
        _id = event_uuid.get_hex()

        json_message = json.dumps({
            'application': application,
            'host': host,
            'severity': severity,
            'timestamp': timestamp,
            '_id': _id,
            'message': message,
        })

        pool = self._get_pool()
        with Mutator(pool) as batch:
            # Save on <CF> CF_LOGS
            row_key = ymd_from_uuid1(event_uuid)
            batch.insert(self._get_cf_logs(), str(row_key), {
                event_uuid: json_message,
            })

            # Save on <CF> CF_LOGS_BY_APP
            batch.insert(self._get_cf_logs_by_app(), application, {
                event_uuid: EMPTY_VALUE,
            })

            # Save on <CF> CF_LOGS_BY_HOST
            batch.insert(self._get_cf_logs_by_host(), host, {
                event_uuid: EMPTY_VALUE,
            })

            # Save on <CF> CF_LOGS_BY_SEVERITY
            batch.insert(self._get_cf_logs_by_severity(), severity, {
                event_uuid: EMPTY_VALUE,
            })
Пример #8
0
class CachedQueryMutator(object):
    def __init__(self):
        self.mutator = Mutator(CONNECTION_POOL)
        self.to_prune = set()

    def __enter__(self):
        return self

    def __exit__(self, type, value, traceback):
        self.send()

    def insert(self, query, things):
        if not things:
            return

        LOG.debug("Inserting %r into query %r", things, query)

        query._insert(self.mutator, things)

        if (random.random() / len(things)) < PRUNE_CHANCE:
            self.to_prune.add(query)

    def delete(self, query, things):
        if not things:
            return

        LOG.debug("Deleting %r from query %r", things, query)

        query._delete(self.mutator, things)

    def send(self):
        self.mutator.send()

        if self.to_prune:
            LOG.debug("Pruning queries %r", self.to_prune)
            CachedQuery._prune_multi(self.to_prune)
Пример #9
0
def set_account_ip(account_id, ip, date=None):
    """Set an IP address as having accessed an account.

    Updates all underlying datastores.
    """
    if date is None:
        date = datetime.datetime.now(g.tz)
    m = Mutator(CONNECTION_POOL)
    m.insert(IPsByAccount._cf, str(account_id), {date: ip}, ttl=CF_TTL)
    m.insert(AccountsByIP._cf, ip, {date: str(account_id)}, ttl=CF_TTL)
    m.send()
Пример #10
0
def set_account_ip(account_id, ip, date=None):
    """Set an IP address as having accessed an account.

    Updates all underlying datastores.
    """
    # don't store private IPs, send a graphite event so we can alert on this
    if ip_address(ip).is_private:
        g.stats.simple_event('ip.private_ip_storage_prevented')
        return

    if date is None:
        date = datetime.datetime.now(g.tz)
    m = Mutator(CONNECTION_POOL)
    m.insert(IPsByAccount._cf, str(account_id), {date: ip}, ttl=CF_TTL)
    m.insert(AccountsByIP._cf, ip, {date: str(account_id)}, ttl=CF_TTL)
    m.send()
Пример #11
0
 def start_batch(self, queue_size=0):
     if self._batch is None:
         self.in_batch = True
         self._batch = Mutator(self._pool, queue_size)
     self.batch_count += 1
Пример #12
0
 def __init__(self):
     self.mutator = Mutator(CONNECTION_POOL)
     self.to_prune = set()
Пример #13
0
class CassandraDataStore(Delegate):
    def __init__(self,
                 keyspace='agamemnon',
                 server_list=['localhost:9160'],
                 replication_factor=1,
                 create_keyspace=False,
                 **kwargs):
        super(CassandraDataStore, self).__init__()

        self._keyspace = keyspace
        self._server_list = server_list
        self._replication_factor = replication_factor
        self._pool_args = kwargs

        self._system_manager = pycassa.system_manager.SystemManager(
            server_list[0])
        if create_keyspace:
            self.create()
        else:
            self.init_pool()

    def init_pool(self):
        self._pool = pycassa.pool.ConnectionPool(self._keyspace,
                                                 self._server_list,
                                                 self._pool_args)

        self._cf_cache = {}
        self._index_cache = {}
        self._batch = None
        self.in_batch = False
        self.batch_count = 0
        if not self.cf_exists(OUTBOUND_RELATIONSHIP_CF):
            self.create_cf(OUTBOUND_RELATIONSHIP_CF, super=True)
        if not self.cf_exists(INBOUND_RELATIONSHIP_CF):
            self.create_cf(INBOUND_RELATIONSHIP_CF, super=True)
        if not self.cf_exists(RELATIONSHIP_INDEX):
            self.create_cf(RELATIONSHIP_INDEX, super=True)
        if not self.cf_exists(RELATIONSHIP_CF):
            self.create_cf(RELATIONSHIP_CF, super=False)

    @property
    def system_manager(self):
        return self._system_manager

    @property
    def keyspace(self):
        return self._keyspace

    def create(self):
        if self._keyspace not in self._system_manager.list_keyspaces():
            strategy_options = {
                'replication_factor': str(self._replication_factor)
            }
            self._system_manager.create_keyspace(
                self._keyspace, strategy_options=strategy_options)
        self.init_pool()

    def drop(self):
        self._system_manager.drop_keyspace(self._keyspace)
        self._pool.dispose()
        self._pool = None

    def truncate(self):
        try:
            self.drop()
        except InvalidRequestException:
            pass
        self.create()
        self.init_pool()

    def get_count(self,
                  type,
                  row,
                  columns=None,
                  column_start=None,
                  super_column=None,
                  column_finish=None):
        args = {}
        if columns is not None:
            args['columns'] = columns
        if column_start is not None:
            args['column_start'] = column_start
        if column_finish is not None:
            args['column_finish'] = column_finish
        if super_column is not None:
            args['super_column'] = super_column
        return self.get_cf(type).get_count(row, **args)

    def create_cf(self,
                  type,
                  column_type=pycassa.system_manager.ASCII_TYPE,
                  super=False,
                  index_columns=list()):
        self._system_manager.create_column_family(self._keyspace,
                                                  type,
                                                  super=super,
                                                  comparator_type=column_type)
        for column in index_columns:
            self.create_secondary_index(type, column, column_type)
        return cf.ColumnFamily(self._pool,
                               type,
                               autopack_names=False,
                               autopack_values=False)

    def create_secondary_index(self,
                               type,
                               column,
                               column_type=pycassa.system_manager.ASCII_TYPE):
        self._system_manager.create_index(self._keyspace,
                                          type,
                                          column,
                                          column_type,
                                          index_name='%s_%s_index' %
                                          (type, column))

    def cf_exists(self, type):
        if type in self._cf_cache:
            return True
        try:
            cf.ColumnFamily(self._pool,
                            type,
                            autopack_names=False,
                            autopack_values=False)
        except NotFoundException:
            return False
        return True

    def get_cf(self, type, create=True):

        column_family = None
        if type in self._cf_cache:
            return self._cf_cache[type]
        try:
            column_family = cf.ColumnFamily(self._pool,
                                            type,
                                            autopack_names=False,
                                            autopack_values=False)
            self._cf_cache[type] = column_family
        except NotFoundException:
            if create:
                column_family = self.create_cf(type)
        return column_family

    def insert(self, column_family, key, columns):
        if self._batch is not None:
            self._batch.insert(column_family, key, columns)
        else:
            with Mutator(self._pool) as b:
                b.insert(column_family, key, columns)

    def remove(self, column_family, key, columns=None, super_column=None):
        if self._batch is not None:
            self._batch.remove(column_family,
                               key,
                               columns=columns,
                               super_column=super_column)
        else:
            column_family.remove(key,
                                 columns=columns,
                                 super_column=super_column)

    def start_batch(self, queue_size=0):
        if self._batch is None:
            self.in_batch = True
            self._batch = Mutator(self._pool, queue_size)
        self.batch_count += 1

    def commit_batch(self):
        self.batch_count -= 1
        if not self.batch_count:
            self._batch.send()
            self._batch = None
Пример #14
0
class CachedQueryMutator(object):
    """Utility to manipulate cached queries with batching.

    This implements the context manager protocol so it can be used with the
    with statement for clean batches.

    """

    def __init__(self):
        self.mutator = Mutator(CONNECTION_POOL)
        self.to_prune = set()

    def __enter__(self):
        return self

    def __exit__(self, type, value, traceback):
        self.send()

    def insert(self, query, things):
        """Insert items into the given cached query.

        If the items are already in the query, they will have their sorts
        updated.

        This will sometimes trigger pruning with a configurable probability
        (see g.querycache_prune_chance).

        """
        if not things:
            return

        LOG.debug("Inserting %r into query %r", things, query)

        assert not query.is_precomputed
        query._insert(self.mutator, things)

        if (random.random() / len(things)) < PRUNE_CHANCE:
            self.to_prune.add(query)

    def replace(self, query, things, ttl=None):
        """Replace a precomputed query with a new set of things.

        The query index will be updated. If a TTL is specified, it will be
        applied to all columns generated by this action allowing old
        precomputed queries to fall away after they're no longer useful.

        """
        assert query.is_precomputed

        if isinstance(ttl, datetime.timedelta):
            ttl = ttl.total_seconds()

        query._replace(self.mutator, things, ttl)

    def delete(self, query, things):
        """Remove things from the query."""
        if not things:
            return

        LOG.debug("Deleting %r from query %r", things, query)

        query._delete(self.mutator, things)

    def send(self):
        """Commit the mutations batched up so far and potentially do pruning.

        This is automatically called by __exit__ when used as a context
        manager.

        """
        self.mutator.send()

        if self.to_prune:
            LOG.debug("Pruning queries %r", self.to_prune)
            CachedQuery._prune_multi(self.to_prune)
Пример #15
0
    def update(self):
        things = list(self.query)

        with Mutator(CONNECTION_POOL) as m:
            self.model.remove(m, self.key, None)  # empty the whole row
            self._insert(m, things)
Пример #16
0
#!/usr/bin/python

#
# PyCassa test
#

from cubichyperloglog import CubicHyperLogLogCassandra

from pycassa.pool import ConnectionPool
from pycassa.columnfamily import ColumnFamily
from pycassa.batch import Mutator

pool = ConnectionPool("test", ["localhost:9160"])
cf = ColumnFamily(pool, "hll")
mut = Mutator(pool, 5000)

test_cardinalities = [1, 2, 3, 4, 5]
test_cardinalities_multiplier = 1000

line = "-" * 62

print line
print "| %5s | %10s | %10s | %10s | %10s  |" % ("bits", "card", "estim", "diff", "diff")
print line

for card in test_cardinalities:
    x = CubicHyperLogLogCassandra(cf, "my_counter_test", 9, mutator=mut)

    x.clear()

    for i in range(card):
Пример #17
0
 def start_batch(self, queue_size = 0):
     if self._batch is None:
         self.in_batch = True
         self._batch = Mutator(self._pool,queue_size)
     self.batch_count += 1
Пример #18
0
class CachedQueryMutator(object):
    """Utility to manipulate cached queries with batching.

    This implements the context manager protocol so it can be used with the
    with statement for clean batches.

    """
    def __init__(self):
        self.mutator = Mutator(CONNECTION_POOL)
        self.to_prune = set()

    def __enter__(self):
        return self

    def __exit__(self, type, value, traceback):
        self.send()

    def insert(self, query, things):
        """Insert items into the given cached query.

        If the items are already in the query, they will have their sorts
        updated.

        This will sometimes trigger pruning with a configurable probability
        (see g.querycache_prune_chance).

        """
        if not things:
            return

        LOG.debug("Inserting %r into query %r", things, query)

        assert not query.is_precomputed
        query._insert(self.mutator, things)

        if (random.random() / len(things)) < PRUNE_CHANCE:
            self.to_prune.add(query)

    def replace(self, query, things, ttl=None):
        """Replace a precomputed query with a new set of things.

        The query index will be updated. If a TTL is specified, it will be
        applied to all columns generated by this action allowing old
        precomputed queries to fall away after they're no longer useful.

        """
        assert query.is_precomputed

        if isinstance(ttl, datetime.timedelta):
            ttl = ttl.total_seconds()

        query._replace(self.mutator, things, ttl)

    def delete(self, query, things):
        """Remove things from the query."""
        if not things:
            return

        LOG.debug("Deleting %r from query %r", things, query)

        query._delete(self.mutator, things)

    def send(self):
        """Commit the mutations batched up so far and potentially do pruning.

        This is automatically called by __exit__ when used as a context
        manager.

        """
        self.mutator.send()

        if self.to_prune:
            LOG.debug("Pruning queries %r", self.to_prune)
            CachedQuery._prune_multi(self.to_prune)
Пример #19
0
class CassandraDataStore(Delegate):
    def __init__(self, 
                 keyspace='agamemnon', 
                 server_list=['localhost:9160'], 
                 replication_factor=1,
                 default_consistency_level=ConsistencyLevel.QUORUM,
                 create_keyspace = False,
                **kwargs):


        super(CassandraDataStore,self).__init__()

        self._keyspace = keyspace
        self._server_list = server_list
        self._replication_factor = replication_factor
        self._consistency_level = default_consistency_level
        self._pool_args = kwargs

        if create_keyspace:
            self.create()
        else:
            self.init_pool()

    def init_pool(self):
        self._pool = pycassa.pool.ConnectionPool(self._keyspace,
                                                 self._server_list,
                                                 self._pool_args)

        self._cf_cache = {}
        self._index_cache = {}
        self._batch = None
        self.in_batch = False
        self.batch_count = 0
        if not self.cf_exists(OUTBOUND_RELATIONSHIP_CF):
            self.create_cf(OUTBOUND_RELATIONSHIP_CF, super=True)
        if not self.cf_exists(INBOUND_RELATIONSHIP_CF):
            self.create_cf(INBOUND_RELATIONSHIP_CF, super=True)
        if not self.cf_exists(RELATIONSHIP_INDEX):
            self.create_cf(RELATIONSHIP_INDEX, super=True)
        if not self.cf_exists(RELATIONSHIP_CF):
            self.create_cf(RELATIONSHIP_CF, super=False)

    @property
    def system_manager(self):
        for server in self._server_list:
            try:
                return pycassa.system_manager.SystemManager(server)
            except TTransportException as e:
                log.warning("Could not connect to Cassandra server {0}".format(server))
        raise CassandraClusterNotFoundException("Could not connect to any Cassandra server in list")

    @property
    def keyspace(self):
        return self._keyspace

    def create(self):
        if self._keyspace not in self.system_manager.list_keyspaces():
            strategy_options = { 'replication_factor': str(self._replication_factor) } 
            self.system_manager.create_keyspace(self._keyspace, 
                                                strategy_options = strategy_options )
        self.init_pool()

    def drop(self):
        self.system_manager.drop_keyspace(self._keyspace)
        self._pool.dispose()
        self._pool = None

    def truncate(self):
        try:
            self.drop()
        except InvalidRequestException:
            pass
        self.create()
        self.init_pool()

    def get_count(self, type, row, columns=None, column_start=None, super_column=None, column_finish=None):
        args = {}
        if columns is not None:
            args['columns'] = columns
        if column_start is not None:
            args['column_start'] = column_start
        if column_finish is not None:
            args['column_finish'] = column_finish
        if super_column is not None:
            args['super_column'] = super_column
        return self.get_cf(type).get_count(row, **args)

    def create_cf(self, type, column_type=pycassa.system_manager.ASCII_TYPE, super=False, index_columns=list()):
        self.system_manager.create_column_family(self._keyspace, type, super=super, comparator_type=column_type)
        for column in index_columns:
            self.create_secondary_index(type, column, column_type)
        return cf.ColumnFamily(self._pool, type, autopack_names=False, autopack_values=False,
                               read_consistency_level=self._consistency_level,
                               write_consistency_level=self._consistency_level)

    def create_secondary_index(self, type, column, column_type=pycassa.system_manager.ASCII_TYPE):
        self.system_manager.create_index(self._keyspace, type, column, column_type,
                                          index_name='%s_%s_index' % (type, column))
    
    def cf_exists(self, type):
        if type in self._cf_cache:
            return True
        try:
            cf.ColumnFamily(self._pool, type, autopack_names=False, autopack_values=False,
                            read_consistency_level=self._consistency_level,
                            write_consistency_level=self._consistency_level)
        except NotFoundException:
            return False
        return True

    def get_cf(self, type, create=True):

        column_family = None
        if type in self._cf_cache:
            return self._cf_cache[type]
        try:
            column_family = cf.ColumnFamily(self._pool, type, autopack_names=False, autopack_values=False,
                                            read_consistency_level=self._consistency_level,
                                            write_consistency_level=self._consistency_level)
            self._cf_cache[type] = column_family
        except NotFoundException:
            if create:
                column_family = self.create_cf(type)
        return column_family


    def insert(self, column_family, key, columns):
        if self._batch is not None:
            self._batch.insert(column_family, key, columns)
        else:
            with Mutator(self._pool) as b:
                b.insert(column_family, key, columns)

    def remove(self,column_family, key, columns=None, super_column=None):
        if self._batch is not None:
            self._batch.remove(column_family, key, columns=columns, super_column=super_column)
        else:
            column_family.remove(key, columns=columns, super_column=super_column)

    def start_batch(self, queue_size = 0):
        if self._batch is None:
            self.in_batch = True
            self._batch = Mutator(self._pool,queue_size)
        self.batch_count += 1


    def commit_batch(self):
        self.batch_count -= 1
        if not self.batch_count:
            self._batch.send()
            self._batch = None
Пример #20
0
 def start_batch(self):
     if self._batch is None:
         self.in_batch = True
         self._batch = Mutator(self._pool,0)
     self.batch_count += 1
Пример #21
0
class CassandraDataStore(object):
    def __init__(self, keyspace, pool, system_manager):
        self._cf_cache = {}
        self._index_cache = {}
        self._system_manager = system_manager
        self._pool = pool
        self._keyspace = keyspace
        self._batch = None
        self.in_batch = False
        self.batch_count = 0
        if not self.cf_exists(OUTBOUND_RELATIONSHIP_CF):
            self.create_cf(OUTBOUND_RELATIONSHIP_CF, super=True)
        if not self.cf_exists(INBOUND_RELATIONSHIP_CF):
            self.create_cf(INBOUND_RELATIONSHIP_CF, super=True)
        if not self.cf_exists(RELATIONSHIP_INDEX):
            self.create_cf(RELATIONSHIP_INDEX, super=True)

    def get_count(self, type, row, columns=None, column_start=None, super_column=None, column_finish=None):
        args = {}
        if columns is not None:
            args['columns'] = columns
        if column_start is not None:
            args['column_start'] = column_start
        if column_finish is not None:
            args['column_finish'] = column_finish
        if super_column is not None:
            args['super_column'] = super_column
        return self.get_cf(type).get_count(row, **args)

    def create_cf(self, type, column_type=system_manager.ASCII_TYPE, super=False, index_columns=list()):
        self._system_manager.create_column_family(self._keyspace, type, super=super, comparator_type=column_type)
        for column in index_columns:
            self._system_manager.create_index(self._keyspace, type, column, column_type,
                                              index_name='%s_%s_index' % (type, column))
        return cf.ColumnFamily(self._pool, type, autopack_names=False, autopack_values=False)
    
    def cf_exists(self, type):
        if type in self._cf_cache:
            return True
        try:
            cf.ColumnFamily(self._pool, type, autopack_names=False, autopack_values=False)
        except NotFoundException:
            return False
        return True

    def get_cf(self, type, create=True):

        column_family = None
        if type in self._cf_cache:
            return self._cf_cache[type]
        try:
            column_family = cf.ColumnFamily(self._pool, type, autopack_names=False, autopack_values=False)
            self._cf_cache[type] = column_family
        except NotFoundException:
            if create:
                column_family = self.create_cf(type)
        return column_family



    def insert(self, column_family, key, columns):
        if self._batch is not None:
            self._batch.insert(column_family, key, columns)
        with Mutator(self._pool) as b:
            b.insert(column_family, key, columns)

    def remove(self,column_family, key, columns=None, super_column=None):
        if self._batch is not None:
            self._batch.remove(column_family, key, columns=columns, super_column=super_column)
        else:
            column_family.remove(key, columns=columns, super_column=super_column)

    def start_batch(self):
        if self._batch is None:
            self.in_batch = True
            self._batch = Mutator(self._pool,0)
        self.batch_count += 1


    def commit_batch(self):
        self.batch_count -= 1
        if not self.batch_count:
            self._batch.send()
            self._batch = None
Пример #22
0
 def insert(self, column_family, key, columns):
     if self._batch is not None:
         self._batch.insert(column_family, key, columns)
     else:
         with Mutator(self._pool) as b:
             b.insert(column_family, key, columns)
    def parse_logs(self, build_ids):
        """Parse the logs for the specified build IDs into storage."""
        # TODO hook up parallel processing.

        OUR_VERSION = '1'
        mut = Mutator(self._pool)
        cf = ColumnFamily(self._pool, 'build_timelines')
        i_cf = ColumnFamily(self._pool, 'indices')
        builds_cf = ColumnFamily(self._pool, 'builds')
        counters = ColumnFamily(self._pool, 'counters')
        super_counters = ColumnFamily(self._pool, 'super_counters')

        for build_id in build_ids:
            info = self._connection.build_from_id(build_id)
            if not info:
                continue

            existing_version = info.get('log_parsing_version')
            if existing_version and existing_version >= OUR_VERSION:
                continue

            if info['log_fetch_status'] != 'fetched':
                continue

            log = self._connection.file_data(info['log_url'])
            if not log:
                continue

            parsed = parse_build_log(log)
            cat = info['builder_category']

            cols = {}
            indices = {}

            for step in parsed.steps:
                start = calendar.timegm(step.start.utctimetuple())
                end = calendar.timegm(step.end.utctimetuple())

                elapsed = end - start
                name = step.name

                cols[start] = {
                    'name': name,
                    'state': step.state,
                    'results': step.results,
                    'start': unicode(start),
                    'end': unicode(end),
                    'elapsed': unicode(elapsed)
                }

                start_date = step.start.date().isoformat()

                indices[name] = {build_id: ''}
                counters.add('build_step_number', name)
                counters.add('build_step_duration', name, elapsed)
                super_counters.add('build_step_number_by_category', name,
                    1, cat)
                super_counters.add('build_step_duration_by_category', name,
                    elapsed, cat)
                super_counters.add('build_step_number_by_day', name, 1,
                    start_date)
                super_counters.add('build_step_duration_by_day', name,
                    elapsed, start_date)

                day_cat = '%s.%s' % (start_date, cat)
                super_counters.add('build_step_number_by_day_and_category',
                    name, 1, day_cat)
                super_counters.add('build_step_duration_by_day_and_category',
                    name, elapsed, day_cat)

            mut.insert(cf, build_id, cols)
            mut.insert(i_cf, 'build_step_name_to_build_ids', indices)
            mut.insert(builds_cf, build_id, {'log_parsing_version': OUR_VERSION})

            yield 'Parsed build %s into %d steps.' % (build_id,
                len(parsed.steps))

        mut.send()