Exemplo n.º 1
0
class CommentTreeStorageV2(CommentTreeStorageBase):
    """Cassandra column-based storage for comment trees.

    Under this implementation, each column in a link's row corresponds to a
    comment on that link. The column name is an encoding of the tuple of
    (comment.parent_id, comment._id), and the value is a counter giving the
    size of the subtree rooted at the comment.

    Key features:
        - does not use permacache!
        - does not require locking for updates
    """

    __metaclass__ = tdb_cassandra.ThingMeta
    _connection_pool = 'main'
    _use_db = True

    _type_prefix = None
    _cf_name = 'CommentTree'

    # column keys are tuples of (depth, parent_id, comment_id)
    _compare_with = types.CompositeType(types.IntegerType(),
                                        types.IntegerType(),
                                        types.IntegerType())

    # column values are counters
    _extra_schema_creation_args = {
        'default_validation_class': COUNTER_COLUMN_TYPE,
        'replicate_on_write': True,
    }

    COLUMN_READ_BATCH_SIZE = tdb_cassandra.max_column_count
    COLUMN_WRITE_BATCH_SIZE = 1000

    @staticmethod
    def _key(link):
        revision = getattr(link, 'comment_tree_id', 0)
        if revision:
            return '%s:%s' % (utils.to36(link._id), utils.to36(revision))
        else:
            return utils.to36(link._id)

    @staticmethod
    def _column_to_obj(cols):
        for col in cols:
            for (depth, pid, cid), val in col.iteritems():
                yield (depth, None if pid == -1 else pid, cid), val

    @classmethod
    def by_link(cls, link):
        try:
            row = cls.get_row(cls._key(link))
        except ttypes.NotFoundException:
            row = {}
        return cls._from_row(row)

    @classmethod
    def get_row(cls, key):
        row = []
        size = 0
        column_start = ''
        while True:
            batch = cls._cf.get(key,
                                column_count=cls.COLUMN_READ_BATCH_SIZE,
                                column_start=column_start)
            row.extend(batch.iteritems())
            num_fetched = len(row) - size
            size = len(row)
            if num_fetched < cls.COLUMN_READ_BATCH_SIZE:
                break
            depth, pid, cid = row[-1][0]
            column_start = (depth, pid if pid is not None else -1, cid + 1)
        return row

    @classmethod
    def _from_row(cls, row):
        # row is a dict of {(depth, parent_id, comment_id): subtree_size}
        cids = []
        tree = {}
        depth = {}
        parents = {}
        num_children = {}
        for (d, pid, cid), val in row:
            if cid == -1:
                continue
            if pid == -1:
                pid = None
            cids.append(cid)
            tree.setdefault(pid, []).append(cid)
            depth[cid] = d
            parents[cid] = pid
            num_children[cid] = val - 1
        return dict(cids=cids,
                    tree=tree,
                    depth=depth,
                    num_children=num_children,
                    parents=parents)

    @classmethod
    @tdb_cassandra.will_write
    def rebuild(cls, tree, comments):
        with batch.Mutator(g.cassandra_pools[cls._connection_pool]) as m:
            g.log.debug('removing tree from %s', cls._key(tree.link))
            m.remove(cls._cf, cls._key(tree.link))
        tree.link._incr('comment_tree_id')
        g.log.debug('link %s comment tree revision bumped up to %s',
                    tree.link._fullname, tree.link.comment_tree_id)

        # make sure all comments have parents attribute filled in
        parents = {c._id: c.parent_id for c in comments}
        for c in comments:
            if c.parent_id and c.parents is None:
                path = []
                pid = c.parent_id
                while pid:
                    path.insert(0, pid)
                    pid = parents[pid]
                c.parents = ':'.join(utils.to36(i) for i in path)
                c._commit()

        return cls.add_comments(tree, comments)

    @classmethod
    @tdb_cassandra.will_write
    def add_comments(cls, tree, comments):
        CommentTreeStorageBase.add_comments(tree, comments)
        g.log.debug('building updates dict')
        updates = {}
        for c in comments:
            pids = c.parent_path()
            pids.append(c._id)
            for d, (pid, cid) in enumerate(zip(pids, pids[1:])):
                k = (d, pid, cid)
                updates[k] = updates.get(k, 0) + 1

        g.log.debug('writing %d updates to %s', len(updates),
                    cls._key(tree.link))
        # increment counters in slices of 100
        cols = updates.keys()
        for i in xrange(0, len(updates), cls.COLUMN_WRITE_BATCH_SIZE):
            g.log.debug('adding updates %d..%d', i,
                        i + cls.COLUMN_WRITE_BATCH_SIZE)
            update_batch = {
                c: updates[c]
                for c in cols[i:i + cls.COLUMN_WRITE_BATCH_SIZE]
            }
            with batch.Mutator(g.cassandra_pools[cls._connection_pool]) as m:
                m.insert(cls._cf, cls._key(tree.link), update_batch)
        g.log.debug('added %d comments with %d updates', len(comments),
                    len(updates))

    @classmethod
    @tdb_cassandra.will_write
    def delete_comment(cls, tree, comment):
        CommentTreeStorageBase.delete_comment(tree, comment)
        pids = comment.parent_path()
        pids.append(comment._id)
        updates = {}
        for d, (pid, cid) in enumerate(zip(pids, pids[1:])):
            updates[(d, pid, cid)] = -1
        with batch.Mutator(g.cassandra_pools[cls._connection_pool]) as m:
            m.insert(cls._cf, cls._key(tree.link), updates)

    @classmethod
    @tdb_cassandra.will_write
    def upgrade(cls, tree, link):
        cids = []
        for parent, children in tree.tree.iteritems():
            cids.extend(children)

        comments = {}
        for i in xrange(0, len(cids), 100):
            g.log.debug('  loading comments %d..%d', i, i + 100)
            comments.update(Comment._byID(cids[i:i + 100], data=True))

        # need to fill in parents attr for each comment
        modified = []
        stack = [None]
        while stack:
            pid = stack.pop()
            if pid is None:
                parents = ''
            else:
                parents = comments[pid].parents + ':' + comments[pid]._id36
            children = tree.tree.get(pid, [])
            stack.extend(children)
            for cid in children:
                if comments[cid].parents != parents:
                    comments[cid].parents = parents
                    modified.append(comments[cid])

        for i, comment in enumerate(modified):
            comment._commit()

        cls.add_comments(tree, comments.values())
Exemplo n.º 2
0
NETWORK_TOPOLOGY_STRATEGY = 'NetworkTopologyStrategy'
""" Replication strategy that puts a number of replicas in each datacenter """

OLD_NETWORK_TOPOLOGY_STRATEGY = 'OldNetworkTopologyStrategy'
"""
Original replication strategy for putting a number of replicas in each datacenter.
This was originally called 'RackAwareStrategy'.
"""

KEYS_INDEX = IndexType.KEYS
""" A secondary index type where each indexed value receives its own row """

BYTES_TYPE = types.BytesType()
LONG_TYPE = types.LongType()
INT_TYPE = types.IntegerType()
ASCII_TYPE = types.AsciiType()
UTF8_TYPE = types.UTF8Type()
TIME_UUID_TYPE = types.TimeUUIDType()
LEXICAL_UUID_TYPE = types.LexicalUUIDType()
COUNTER_COLUMN_TYPE = types.CounterColumnType()
DOUBLE_TYPE = types.DoubleType()
FLOAT_TYPE = types.FloatType()
DECIMAL_TYPE = types.DecimalType()
BOOLEAN_TYPE = types.BooleanType()
DATE_TYPE = types.DateType()

class SystemManager(object):
    """
    Lets you examine and modify schema definitions as well as get basic
    information about the cluster.
Exemplo n.º 3
0
    def syncdb(keyspace=None):
        """
        Create Cassandra keyspace, CF, SCF
        """
        if not keyspace:
            keyspace = FLAGS.get("cassandra_keyspace", "synaps_test")
        serverlist = FLAGS.get("cassandra_server_list")
        replication_factor = FLAGS.get("cassandra_replication_factor")
        manager = pycassa.SystemManager(server=serverlist[0])
        strategy_options = {'replication_factor': replication_factor}

        # create keyspace
        LOG.info(_("cassandra syncdb is started for keyspace(%s)" % keyspace))
        if keyspace not in manager.list_keyspaces():
            LOG.info(_("cassandra keyspace %s does not exist.") % keyspace)
            manager.create_keyspace(keyspace,
                                    strategy_options=strategy_options)
            LOG.info(_("cassandra keyspace %s is created.") % keyspace)
        else:
            property = manager.get_keyspace_properties(keyspace)

            # check strategy_option
            if not (strategy_options == property.get('strategy_options')):
                manager.alter_keyspace(keyspace,
                                       strategy_options=strategy_options)
                LOG.info(
                    _("cassandra keyspace strategy options is updated - %s" %
                      str(strategy_options)))

        # create CF, SCF
        column_families = manager.get_keyspace_column_families(keyspace)

        if 'Metric' not in column_families.keys():
            manager.create_column_family(
                keyspace=keyspace,
                name='Metric',
                comparator_type=pycassa.ASCII_TYPE,
                key_validation_class=pycassa.LEXICAL_UUID_TYPE,
                column_validation_classes={
                    'project_id': pycassa.UTF8_TYPE,
                    'name': pycassa.UTF8_TYPE,
                    'namespace': pycassa.UTF8_TYPE,
                    'unit': pycassa.UTF8_TYPE,
                    'dimensions': pycassa.UTF8_TYPE,
                    'updated_timestamp': pycassa.DATE_TYPE,
                    'created_timestamp': pycassa.DATE_TYPE
                })
            manager.create_index(keyspace=keyspace,
                                 column_family='Metric',
                                 column='project_id',
                                 value_type=types.UTF8Type())
            manager.create_index(keyspace=keyspace,
                                 column_family='Metric',
                                 column='name',
                                 value_type=types.UTF8Type())
            manager.create_index(keyspace=keyspace,
                                 column_family='Metric',
                                 column='namespace',
                                 value_type=types.UTF8Type())
            manager.create_index(keyspace=keyspace,
                                 column_family='Metric',
                                 column='dimensions',
                                 value_type=types.UTF8Type())
            manager.create_index(keyspace=keyspace,
                                 column_family='Metric',
                                 column='updated_timestamp',
                                 value_type=types.DateType())
            manager.create_index(keyspace=keyspace,
                                 column_family='Metric',
                                 column='created_timestamp',
                                 value_type=types.DateType())

        if 'StatArchive' not in column_families.keys():
            manager.create_column_family(
                keyspace=keyspace,
                name='StatArchive',
                super=True,
                key_validation_class=pycassa.LEXICAL_UUID_TYPE,
                comparator_type=pycassa.ASCII_TYPE,
                subcomparator_type=pycassa.DATE_TYPE,
                default_validation_class=pycassa.DOUBLE_TYPE)

        if 'MetricAlarm' not in column_families.keys():
            manager.create_column_family(
                keyspace=keyspace,
                name='MetricAlarm',
                key_validation_class=pycassa.LEXICAL_UUID_TYPE,
                comparator_type=pycassa.ASCII_TYPE,
                column_validation_classes={
                    'metric_key': pycassa.LEXICAL_UUID_TYPE,
                    'project_id': pycassa.UTF8_TYPE,
                    'actions_enabled': pycassa.BOOLEAN_TYPE,
                    'alarm_actions': pycassa.UTF8_TYPE,
                    'alarm_arn': pycassa.UTF8_TYPE,
                    'alarm_configuration_updated_timestamp': pycassa.DATE_TYPE,
                    'alarm_description': pycassa.UTF8_TYPE,
                    'alarm_name': pycassa.UTF8_TYPE,
                    'comparison_operator': pycassa.UTF8_TYPE,
                    'dimensions': pycassa.UTF8_TYPE,
                    'evaluation_periods': pycassa.INT_TYPE,
                    'insufficient_data_actions': pycassa.UTF8_TYPE,
                    'metric_name': pycassa.UTF8_TYPE,
                    'namespace': pycassa.UTF8_TYPE,
                    'ok_actions': pycassa.UTF8_TYPE,
                    'period': pycassa.INT_TYPE,
                    'state_reason': pycassa.UTF8_TYPE,
                    'state_reason_data': pycassa.UTF8_TYPE,
                    'state_updated_timestamp': pycassa.DATE_TYPE,
                    'state_value': pycassa.UTF8_TYPE,
                    'statistic': pycassa.UTF8_TYPE,
                    'threshold': pycassa.DOUBLE_TYPE,
                    'unit': pycassa.UTF8_TYPE
                })

            manager.create_index(keyspace=keyspace,
                                 column_family='MetricAlarm',
                                 column='project_id',
                                 value_type=types.UTF8Type())
            manager.create_index(keyspace=keyspace,
                                 column_family='MetricAlarm',
                                 column='metric_key',
                                 value_type=types.LexicalUUIDType())
            manager.create_index(keyspace=keyspace,
                                 column_family='MetricAlarm',
                                 column='alarm_name',
                                 value_type=types.UTF8Type())
            manager.create_index(keyspace=keyspace,
                                 column_family='MetricAlarm',
                                 column='state_updated_timestamp',
                                 value_type=types.DateType())
            manager.create_index(
                keyspace=keyspace,
                column_family='MetricAlarm',
                column='alarm_configuration_updated_timestamp',
                value_type=types.DateType())
            manager.create_index(keyspace=keyspace,
                                 column_family='MetricAlarm',
                                 column='state_value',
                                 value_type=types.UTF8Type())
            manager.create_index(keyspace=keyspace,
                                 column_family='MetricAlarm',
                                 column='period',
                                 value_type=types.IntegerType())
            manager.create_index(keyspace=keyspace,
                                 column_family='MetricAlarm',
                                 column='statistic',
                                 value_type=types.UTF8Type())

        if 'AlarmHistory' not in column_families.keys():
            manager.create_column_family(
                keyspace=keyspace,
                name='AlarmHistory',
                key_validation_class=pycassa.LEXICAL_UUID_TYPE,
                comparator_type=pycassa.ASCII_TYPE,
                column_validation_classes={
                    'project_id': pycassa.UTF8_TYPE,
                    'alarm_key': pycassa.LEXICAL_UUID_TYPE,
                    'alarm_name': pycassa.UTF8_TYPE,
                    'history_data': pycassa.UTF8_TYPE,
                    'history_item_type': pycassa.UTF8_TYPE,
                    'history_summary': pycassa.UTF8_TYPE,
                    'timestamp': pycassa.DATE_TYPE,
                })

            manager.create_index(keyspace=keyspace,
                                 column_family='AlarmHistory',
                                 column='project_id',
                                 value_type=types.UTF8Type())

            manager.create_index(keyspace=keyspace,
                                 column_family='AlarmHistory',
                                 column='alarm_key',
                                 value_type=types.LexicalUUIDType())

            manager.create_index(keyspace=keyspace,
                                 column_family='AlarmHistory',
                                 column='alarm_name',
                                 value_type=types.UTF8Type())

            manager.create_index(keyspace=keyspace,
                                 column_family='AlarmHistory',
                                 column='history_item_type',
                                 value_type=types.UTF8Type())

            manager.create_index(keyspace=keyspace,
                                 column_family='AlarmHistory',
                                 column='timestamp',
                                 value_type=types.DateType())

        if 'AlarmCounter' not in column_families.keys():
            manager.create_column_family(
                keyspace=keyspace,
                name='AlarmCounter',
                default_validation_class=pycassa.COUNTER_COLUMN_TYPE,
                key_validation_class=pycassa.UTF8_TYPE)

        if 'NotificationGroup' not in column_families.keys():
            manager.create_column_family(
                keyspace=keyspace,
                name='NotificationGroup',
                key_validation_class=pycassa.UTF8_TYPE,
                comparator_type=pycassa.UTF8_TYPE,
                default_validation_class=pycassa.UTF8_TYPE)

        LOG.info(_("cassandra syncdb has finished"))