def load_schema(self): """ Loads the schema definition for this column family from Cassandra and updates comparator and validation classes if neccessary. """ ksdef = self.pool.execute("get_keyspace_description", use_dict_for_col_metadata=True) try: self._cfdef = ksdef[self.column_family] except KeyError: nfe = NotFoundException() nfe.why = "Column family %s not found." % self.column_family raise nfe self.super = self._cfdef.column_type == "Super" self._load_comparator_classes() self._load_validation_classes() self._load_key_class()
def load_schema(self): """ Loads the schema definition for this column family from Cassandra and updates comparator and validation classes if neccessary. """ ksdef = self.pool.execute('get_keyspace_description', use_dict_for_col_metadata=True) try: self._cfdef = ksdef[self.column_family] except KeyError: nfe = NotFoundException() nfe.why = 'Column family %s not found.' % self.column_family raise nfe self.super = self._cfdef.column_type == 'Super' self._load_comparator_classes() self._load_validation_classes() self._load_key_class()
def load_schema(self): """ Loads the schema definition for this column family from Cassandra and updates comparator and validation classes if neccessary. """ try: try: self._obtain_connection() ksdef = self._tlocal.client.get_keyspace_description(use_dict_for_col_metadata=True) self._cfdef = ksdef[self.column_family] except KeyError: nfe = NotFoundException() nfe.why = 'Column family %s not found.' % self.column_family raise nfe finally: self._release_connection() self.super = self._cfdef.column_type == 'Super' self._load_comparator_classes() self._load_validation_classes() self._load_key_class()
def get(self, key, columns=None, column_start="", column_finish="", column_reversed=False, column_count=100, include_timestamp=False, super_column=None, read_consistency_level=None, include_ttl=False): """ Fetches all or part of the row with key `key`. The columns fetched may be limited to a specified list of column names using `columns`. Alternatively, you may fetch a slice of columns or super columns from a row using `column_start`, `column_finish`, and `column_count`. Setting these will cause columns or super columns to be fetched starting with `column_start`, continuing until `column_count` columns or super columns have been fetched or `column_finish` is reached. If `column_start` is left as the empty string, the slice will begin with the start of the row; leaving `column_finish` blank will cause the slice to extend to the end of the row. Note that `column_count` defaults to 100, so rows over this size will not be completely fetched by default. If `column_reversed` is ``True``, columns are fetched in reverse sorted order, beginning with `column_start`. In this case, if `column_start` is the empty string, the slice will begin with the end of the row. You may fetch all or part of only a single super column by setting `super_column`. If this is set, `column_start`, `column_finish`, `column_count`, and `column_reversed` will apply to the subcolumns of `super_column`. To include every column's timestamp in the result set, set `include_timestamp` to ``True``. Results will include a ``(value, timestamp)`` tuple for each column. To include every column's ttl in the result set, set `include_ttl` to ``True``. Results will include a ``(value, ttl)`` tuple for each column. If this is a standard column family, the return type is of the form ``{column_name: column_value}``. If this is a super column family and `super_column` is not specified, the results are of the form ``{super_column_name: {column_name, column_value}}``. If `super_column` is set, the super column name will be excluded and the results are of the form ``{column_name: column_value}``. """ packed_key = self._pack_key(key) single_column = columns is not None and len(columns) == 1 if (not self.super and single_column) or \ (self.super and super_column is not None and single_column): column = None if self.super and super_column is None: super_column = columns[0] else: column = columns[0] cp = self._column_path(super_column, column) col_or_super = self.pool.execute('get', packed_key, cp, read_consistency_level or self.read_consistency_level) return self._cosc_to_dict([col_or_super], include_timestamp, include_ttl) else: cp = self._column_parent(super_column) sp = self._slice_predicate(columns, column_start, column_finish, column_reversed, column_count, super_column) list_col_or_super = self.pool.execute('get_slice', packed_key, cp, sp, read_consistency_level or self.read_consistency_level) if len(list_col_or_super) == 0: raise NotFoundException() return self._cosc_to_dict(list_col_or_super, include_timestamp, include_ttl)
def __init__(self, client, column_family, buffer_size=1024, read_consistency_level=ConsistencyLevel.ONE, write_consistency_level=ConsistencyLevel.ONE, timestamp=gm_timestamp, super=False, dict_class=dict, autopack_names=True, autopack_values=True): """ Constructs an abstraction of a Cassandra column family or super column family. Operations on this, such as `get` or `insert` will get data from or insert data into the corresponding Cassandra column family. :Parameters: `client`: :class:`cassandra.Cassandra.Client` Cassandra client with thrift API `column_family`: string The name of this ColumnFamily `buffer_size`: integer When calling `get_range`, the intermediate results need to be buffered if we are fetching many rows, otherwise the Cassandra server will overallocate memory and fail. This is the size of that buffer. `read_consistency_level`: :class:`pycassa.cassandra.ttypes.ConsistencyLevel` Affects the guaranteed replication factor before returning from any read operation `write_consistency_level`: :class:`pycassa.cassandra.ttypes.ConsistencyLevel` Affects the guaranteed replication factor before returning from any write operation `timestamp`: function The default timestamp function returns: int(time.mktime(time.gmtime())) Or the number of seconds since Unix epoch in GMT. Set timestamp to replace the default timestamp function with your own. `super`: bool Whether this ColumnFamily has SuperColumns `dict_class` : class (must act like the dict type) The default dict_class is :class:`dict`. If the order of columns matter to you, pass your own dictionary class, or python 2.7's new :class:`collections.OrderedDict`. All returned rows and subcolumns are instances of this. `autopack_names`: bool Whether column and supercolumn names should be packed automatically based on the comparator and subcomparator for the column family. This does not typically work when used with :class:`~pycassa.columnfamilymap.ColumnFamilyMap`. `autopack_values`: bool Whether column values should be packed automatically based on the validator_class for a given column. This should probably be set to ``False`` when used with a :class:`~pycassa.columnfamilymap.ColumnFamilyMap`. """ self.client = client self.column_family = column_family self.buffer_size = buffer_size self.read_consistency_level = read_consistency_level self.write_consistency_level = write_consistency_level self.timestamp = timestamp self.super = super self.dict_class = dict_class self.autopack_names = autopack_names self.autopack_values = autopack_values # Determine the ColumnFamily type to allow for auto conversion # so that packing/unpacking doesn't need to be done manually self.cf_data_type = None self.col_name_data_type = None self.supercol_name_data_type = None self.col_type_dict = dict() col_fam = None try: col_fam = client.get_keyspace_description()[self.column_family] except KeyError: raise NotFoundException('Column family %s not found.' % self.column_family) if col_fam is not None: if self.autopack_names: if not self.super: self.col_name_data_type = col_fam.comparator_type else: self.col_name_data_type = col_fam.subcomparator_type self.supercol_name_data_type = self._extract_type_name( col_fam.comparator_type) index = self.col_name_data_type = self._extract_type_name( self.col_name_data_type) if self.autopack_values: self.cf_data_type = self._extract_type_name( col_fam.default_validation_class) for name, cdef in col_fam.column_metadata.items(): self.col_type_dict[name] = self._extract_type_name( cdef.validation_class)
def get_indexed_slices(self, index_clause, columns=None, column_start="", column_finish="", column_reversed=False, column_count=100, include_timestamp=False, super_column=None, read_consistency_level=None): """ Fetches a list of KeySlices from a Cassandra server based on an index clause :Parameters: `index_clause`: :class:`~pycassa.cassandra.ttypes.IndexClause` Limits the keys that are returned based on expressions that compare the value of a column to a given value. At least one of the expressions in the IndexClause must be on an indexed column. .. seealso:: meth::pycassa.index.create_index_clause() and meth::pycassa.index.create_index_expression(). `columns`: [str] Limit the columns or super_columns fetched to the specified list `column_start`: str Only fetch when a column or super_column is >= column_start `column_finish`: str Only fetch when a column or super_column is <= column_finish `column_reversed`: bool Fetch the columns or super_columns in reverse order. This will do nothing unless you passed a dict_class to the constructor. `column_count`: int Limit the number of columns or super_columns fetched per key `include_timestamp`: bool If true, return a (value, timestamp) tuple for each column `super_column`: str Return columns only in this super_column `read_consistency_level`: :class:`pycassa.cassandra.ttypes.ConsistencyLevel` Affects the guaranteed replication factor before returning from any read operation :Returns: if include_timestamp == True: {key : {column : (value, timestamp)}} else: {key : {column : value}} """ (super_column, column_start, column_finish) = self._pack_slice_cols(super_column, column_start, column_finish) packed_cols = None if columns is not None: packed_cols = [] for col in columns: packed_cols.append( self._pack_name(col, is_supercol_name=self.super)) cp = ColumnParent(column_family=self.column_family, super_column=super_column) sp = create_SlicePredicate(packed_cols, column_start, column_finish, column_reversed, column_count) # Pack the values in the index clause expressions new_exprs = [] for expr in index_clause.expressions: new_exprs.append(IndexExpression(self._pack_name(expr.column_name), expr.op, \ self._pack_value(expr.value, expr.column_name))) index_clause.expressions = new_exprs keyslice_list = self.client.get_indexed_slices( cp, index_clause, sp, self._rcl(read_consistency_level)) if len(keyslice_list) == 0: raise NotFoundException() return self._convert_KeySlice_list_to_dict_class( keyslice_list, include_timestamp)
def get(self, key, columns=None, column_start="", column_finish="", column_reversed=False, column_count=100, include_timestamp=False, super_column=None, read_consistency_level=None): """ Fetch a key from a Cassandra server :Parameters: `key`: str The key to fetch `columns`: [str] Limit the columns or super_columns fetched to the specified list `column_start`: str Only fetch when a column or super_column is >= column_start `column_finish`: str Only fetch when a column or super_column is <= column_finish `column_reversed`: bool Fetch the columns or super_columns in reverse order. This will do nothing unless you passed a ``dict_class`` to the constructor. `column_count`: int Limit the number of columns or super_columns fetched per key `include_timestamp` : bool If true, return a (value, timestamp) tuple for each column `super_column`: str Return columns only in this super_column `read_consistency_level`: :class:`pycassa.cassandra.ttypes.ConsistencyLevel` Affects the guaranteed replication factor before returning from any read operation :Returns: if include_timestamp == True: {'column': ('value', timestamp)} else: {'column': 'value'} """ super_column, column_start, column_finish = self._pack_slice_cols( super_column, column_start, column_finish) packed_cols = None if columns is not None: packed_cols = [] for col in columns: packed_cols.append( self._pack_name(col, is_supercol_name=self.super)) cp = ColumnParent(column_family=self.column_family, super_column=super_column) sp = create_SlicePredicate(packed_cols, column_start, column_finish, column_reversed, column_count) list_col_or_super = self.client.get_slice( key, cp, sp, self._rcl(read_consistency_level)) if len(list_col_or_super) == 0: raise NotFoundException() return self._convert_ColumnOrSuperColumns_to_dict_class( list_col_or_super, include_timestamp)
def __init__(self, pool, column_family, buffer_size=1024, read_consistency_level=ConsistencyLevel.ONE, write_consistency_level=ConsistencyLevel.ONE, timestamp=gm_timestamp, super=False, dict_class=util.OrderedDict, autopack_names=True, autopack_values=True): """ An abstraction of a Cassandra column family or super column family. Operations on this, such as :meth:`get` or :meth:`insert` will get data from or insert data into the corresponding Cassandra column family with name `column_family`. `pool` is a :class:`~pycassa.pool.ConnectionPool` that the column family will use for all operations. A connection is drawn from the pool before each operations and is returned afterwards. Note that the keyspace to be used is determined by the pool. When calling :meth:`get_range()` or :meth:`get_indexed_slices()`, the intermediate results need to be buffered if we are fetching many rows, otherwise the Cassandra server will overallocate memory and fail. `buffer_size` is the size of that buffer in number of rows. The default is 1024. `read_consistency_level` and `write_consistency_level` set the default consistency levels for every operation; these may be overridden per-operation. These should be instances of :class:`~pycassa.cassandra.ttypes.ConsistencyLevel`. These default to level ``ONE``. Each :meth:`insert()` or :meth:`remove` sends a timestamp with every column. The `timestamp` parameter is a function that is used to get this timestamp when needed. The default function is :meth:`gm_timestamp()`. Results are returned as dictionaries. :class:`~pycassa.util.OrderedDict` is used by default so that order is maintained. A different class, such as :class:`dict` may be used instead by passing `dict_class`. By default, column family definitions will be examined to determine what data type Cassandra expects for column names and values. When columns are retrieved or inserted, their names and values will be packed or unpacked if necessary to convert them to or from their binary representation. Automatic packing of names and values can be individually enabled or disabled with `autopack_names` and `autopack_values`. When using :class:`~pycassa.columnfamilymap.ColumnFamilyMap`, these should both be set to ``False``. """ self.pool = pool self._tlocal = threading.local() self._tlocal.client = None self.column_family = column_family self.buffer_size = buffer_size self.read_consistency_level = read_consistency_level self.write_consistency_level = write_consistency_level self.timestamp = timestamp self.dict_class = dict_class self.autopack_names = autopack_names self.autopack_values = autopack_values # Determine the ColumnFamily type to allow for auto conversion # so that packing/unpacking doesn't need to be done manually self.cf_data_type = None self.col_name_data_type = None self.supercol_name_data_type = None self.col_type_dict = dict() col_fam = None try: try: self._obtain_connection() col_fam = self._tlocal.client.get_keyspace_description(use_dict_for_col_metadata=True)[self.column_family] except KeyError: nfe = NotFoundException() nfe.why = 'Column family %s not found.' % self.column_family raise nfe finally: self._release_connection() if col_fam is not None: self.super = col_fam.column_type == 'Super' if self.autopack_names: if not self.super: self.col_name_data_type = col_fam.comparator_type else: self.col_name_data_type = col_fam.subcomparator_type self.supercol_name_data_type = util.extract_type_name(col_fam.comparator_type) index = self.col_name_data_type = util.extract_type_name(self.col_name_data_type) if self.autopack_values: self.cf_data_type = util.extract_type_name(col_fam.default_validation_class) for name, cdef in col_fam.column_metadata.items(): self.col_type_dict[name] = util.extract_type_name(cdef.validation_class)
def __init__(self, pool, column_family, buffer_size=1024, read_consistency_level=ConsistencyLevel.ONE, write_consistency_level=ConsistencyLevel.ONE, timestamp=gm_timestamp, super=False, dict_class=OrderedDict, autopack_names=True, autopack_values=True): """ Constructs an abstraction of a Cassandra column family or super column family. Operations on this, such as :meth:`get` or :meth:`insert` will get data from or insert data into the corresponding Cassandra column family. :param pool: A connection pool to a Cassandra cluster :type client: :class:`~pycassa.pool.AbstractPool` :param column_family: The name of the column family :type column_family: string :param buffer_size: When calling :meth:`get_range()` or :meth:`get_indexed_slices()`, the intermediate results need to be buffered if we are fetching many rows, otherwise the Cassandra server will overallocate memory and fail. This is the size of that buffer in number of rows. :type buffer_size: int :param read_consistency_level: Affects the guaranteed replication factor before returning from any read operation :type read_consistency_level: :class:`~pycassa.cassandra.ttypes.ConsistencyLevel` :param write_consistency_level: Affects the guaranteed replication factor before returning from any write operation :type write_consistency_level: :class:`~pycassa.cassandra.ttypes.ConsistencyLevel` :param timestamp: The default timestamp function returns ``int(time.mktime(time.gmtime()))``, the number of seconds since Unix epoch in GMT. Set this to replace the default timestamp function with your own. :type timestamp: function :param dict_class: The default dict_class is :class:`~pycassa.util.OrderedDict`. All returned rows and subcolumns are instances of this. :type dict_class: class :param autopack_names: Whether column and supercolumn names should be packed automatically based on the comparator and subcomparator for the column family. This does not typically work when used with :class:`~pycassa.columnfamilymap.ColumnFamilyMap`. :type autopack_names: bool :param autopack_values: Whether column values should be packed automatically based on the validator_class for a given column. This should probably be set to ``False`` when used with a :class:`~pycassa.columnfamilymap.ColumnFamilyMap`. :type autopack_values: bool :param super: Whether this column family has super columns. This is detected automatically since 0.5.1. .. deprecated:: 0.5.1 :type super: bool """ self.pool = pool self.client = None self.column_family = column_family self.buffer_size = buffer_size self.read_consistency_level = read_consistency_level self.write_consistency_level = write_consistency_level self.timestamp = timestamp self.dict_class = dict_class self.autopack_names = autopack_names self.autopack_values = autopack_values # Determine the ColumnFamily type to allow for auto conversion # so that packing/unpacking doesn't need to be done manually self.cf_data_type = None self.col_name_data_type = None self.supercol_name_data_type = None self.col_type_dict = dict() col_fam = None try: self.client = self.pool.get() col_fam = self.client.get_keyspace_description(use_dict_for_col_metadata=True)[self.column_family] except KeyError: nfe = NotFoundException() nfe.why = 'Column family %s not found.' % self.column_family raise nfe finally: self.client.return_to_pool() if col_fam is not None: self.super = col_fam.column_type == 'Super' if self.autopack_names: if not self.super: self.col_name_data_type = col_fam.comparator_type else: self.col_name_data_type = col_fam.subcomparator_type self.supercol_name_data_type = self._extract_type_name(col_fam.comparator_type) index = self.col_name_data_type = self._extract_type_name(self.col_name_data_type) if self.autopack_values: self.cf_data_type = self._extract_type_name(col_fam.default_validation_class) for name, cdef in col_fam.column_metadata.items(): self.col_type_dict[name] = self._extract_type_name(cdef.validation_class)
def __init__(self, pool, column_family, buffer_size=1024, read_consistency_level=ConsistencyLevel.ONE, write_consistency_level=ConsistencyLevel.ONE, timestamp=gm_timestamp, super=False, dict_class=util.OrderedDict, autopack_names=True, autopack_values=True): """ An abstraction of a Cassandra column family or super column family. Operations on this, such as :meth:`get` or :meth:`insert` will get data from or insert data into the corresponding Cassandra column family with name `column_family`. `pool` is a :class:`~pycassa.pool.ConnectionPool` that the column family will use for all operations. A connection is drawn from the pool before each operations and is returned afterwards. Note that the keyspace to be used is determined by the pool. When calling :meth:`get_range()` or :meth:`get_indexed_slices()`, the intermediate results need to be buffered if we are fetching many rows, otherwise the Cassandra server will overallocate memory and fail. `buffer_size` is the size of that buffer in number of rows. The default is 1024. `read_consistency_level` and `write_consistency_level` set the default consistency levels for every operation; these may be overridden per-operation. These should be instances of :class:`~pycassa.cassandra.ttypes.ConsistencyLevel`. These default to level ``ONE``. Each :meth:`insert()` or :meth:`remove` sends a timestamp with every column. The `timestamp` parameter is a function that is used to get this timestamp when needed. The default function is :meth:`gm_timestamp()`. Results are returned as dictionaries. :class:`~pycassa.util.OrderedDict` is used by default so that order is maintained. A different class, such as :class:`dict` may be used instead by passing `dict_class`. By default, column family definitions will be examined to determine what data type Cassandra expects for column names and values. When columns are retrieved or inserted, their names and values will be packed or unpacked if necessary to convert them to or from their binary representation. Automatic packing of names and values can be individually enabled or disabled with `autopack_names` and `autopack_values`. When using :class:`~pycassa.columnfamilymap.ColumnFamilyMap`, these should both be set to ``False``. """ self.pool = pool self._tlocal = threading.local() self._tlocal.client = None self.column_family = column_family self.buffer_size = buffer_size self.read_consistency_level = read_consistency_level self.write_consistency_level = write_consistency_level self.timestamp = timestamp self.dict_class = dict_class self.autopack_names = autopack_names self.autopack_values = autopack_values # Determine the ColumnFamily type to allow for auto conversion # so that packing/unpacking doesn't need to be done manually self.cf_data_type = None self.col_name_data_type = None self.supercol_name_data_type = None self.col_type_dict = dict() col_fam = None try: try: self._obtain_connection() col_fam = self._tlocal.client.get_keyspace_description( use_dict_for_col_metadata=True)[self.column_family] except KeyError: nfe = NotFoundException() nfe.why = 'Column family %s not found.' % self.column_family raise nfe finally: self._release_connection() if col_fam is not None: self.super = col_fam.column_type == 'Super' if self.autopack_names: if not self.super: self.col_name_data_type = col_fam.comparator_type else: self.col_name_data_type = col_fam.subcomparator_type self.supercol_name_data_type = util.extract_type_name( col_fam.comparator_type) index = self.col_name_data_type = util.extract_type_name( self.col_name_data_type) if self.autopack_values: self.cf_data_type = util.extract_type_name( col_fam.default_validation_class) for name, cdef in col_fam.column_metadata.items(): self.col_type_dict[name] = util.extract_type_name( cdef.validation_class)