Exemplo n.º 1
0
    def load_schema(self):
        """
        Loads the schema definition for this column family from
        Cassandra and updates comparator and validation classes if
        neccessary.
        """
        ksdef = self.pool.execute("get_keyspace_description", use_dict_for_col_metadata=True)
        try:
            self._cfdef = ksdef[self.column_family]
        except KeyError:
            nfe = NotFoundException()
            nfe.why = "Column family %s not found." % self.column_family
            raise nfe

        self.super = self._cfdef.column_type == "Super"
        self._load_comparator_classes()
        self._load_validation_classes()
        self._load_key_class()
Exemplo n.º 2
0
    def load_schema(self):
        """
        Loads the schema definition for this column family from
        Cassandra and updates comparator and validation classes if
        neccessary.
        """
        ksdef = self.pool.execute('get_keyspace_description',
                                  use_dict_for_col_metadata=True)
        try:
            self._cfdef = ksdef[self.column_family]
        except KeyError:
            nfe = NotFoundException()
            nfe.why = 'Column family %s not found.' % self.column_family
            raise nfe

        self.super = self._cfdef.column_type == 'Super'
        self._load_comparator_classes()
        self._load_validation_classes()
        self._load_key_class()
Exemplo n.º 3
0
    def load_schema(self):
        """
        Loads the schema definition for this column family from
        Cassandra and updates comparator and validation classes if
        neccessary.
        """
        try:
            try:
                self._obtain_connection()
                ksdef = self._tlocal.client.get_keyspace_description(use_dict_for_col_metadata=True)
                self._cfdef = ksdef[self.column_family]
            except KeyError:
                nfe = NotFoundException()
                nfe.why = 'Column family %s not found.' % self.column_family
                raise nfe
        finally:
            self._release_connection()

        self.super = self._cfdef.column_type == 'Super'
        self._load_comparator_classes()
        self._load_validation_classes()
        self._load_key_class()
Exemplo n.º 4
0
    def load_schema(self):
        """
        Loads the schema definition for this column family from
        Cassandra and updates comparator and validation classes if
        neccessary.
        """
        try:
            try:
                self._obtain_connection()
                ksdef = self._tlocal.client.get_keyspace_description(use_dict_for_col_metadata=True)
                self._cfdef = ksdef[self.column_family]
            except KeyError:
                nfe = NotFoundException()
                nfe.why = 'Column family %s not found.' % self.column_family
                raise nfe
        finally:
            self._release_connection()

        self.super = self._cfdef.column_type == 'Super'
        self._load_comparator_classes()
        self._load_validation_classes()
        self._load_key_class()
Exemplo n.º 5
0
    def get(self, key, columns=None, column_start="", column_finish="",
            column_reversed=False, column_count=100, include_timestamp=False,
            super_column=None, read_consistency_level=None, include_ttl=False):
        """
        Fetches all or part of the row with key `key`.

        The columns fetched may be limited to a specified list of column names
        using `columns`.

        Alternatively, you may fetch a slice of columns or super columns from a row
        using `column_start`, `column_finish`, and `column_count`.
        Setting these will cause columns or super columns to be fetched starting with
        `column_start`, continuing until `column_count` columns or super columns have
        been fetched or `column_finish` is reached.  If `column_start` is left as the
        empty string, the slice will begin with the start of the row; leaving
        `column_finish` blank will cause the slice to extend to the end of the row.
        Note that `column_count` defaults to 100, so rows over this size will not be
        completely fetched by default.

        If `column_reversed` is ``True``, columns are fetched in reverse sorted order,
        beginning with `column_start`.  In this case, if `column_start` is the empty
        string, the slice will begin with the end of the row.

        You may fetch all or part of only a single super column by setting `super_column`.
        If this is set, `column_start`, `column_finish`, `column_count`, and `column_reversed`
        will apply to the subcolumns of `super_column`.

        To include every column's timestamp in the result set, set `include_timestamp` to
        ``True``.  Results will include a ``(value, timestamp)`` tuple for each column.

        To include every column's ttl in the result set, set `include_ttl` to
        ``True``.  Results will include a ``(value, ttl)`` tuple for each column.

        If this is a standard column family, the return type is of the form
        ``{column_name: column_value}``.  If this is a super column family and `super_column`
        is not specified, the results are of the form
        ``{super_column_name: {column_name, column_value}}``.  If `super_column` is set,
        the super column name will be excluded and the results are of the form
        ``{column_name: column_value}``.

        """

        packed_key = self._pack_key(key)
        single_column = columns is not None and len(columns) == 1
        if (not self.super and single_column) or \
           (self.super and super_column is not None and single_column):
            column = None
            if self.super and super_column is None:
                super_column = columns[0]
            else:
                column = columns[0]
            cp = self._column_path(super_column, column)
            col_or_super = self.pool.execute('get', packed_key, cp,
                    read_consistency_level or self.read_consistency_level)
            return self._cosc_to_dict([col_or_super], include_timestamp, include_ttl)
        else:
            cp = self._column_parent(super_column)
            sp = self._slice_predicate(columns, column_start, column_finish,
                                       column_reversed, column_count, super_column)

            list_col_or_super = self.pool.execute('get_slice', packed_key, cp, sp,
                read_consistency_level or self.read_consistency_level)

            if len(list_col_or_super) == 0:
                raise NotFoundException()
            return self._cosc_to_dict(list_col_or_super, include_timestamp, include_ttl)
Exemplo n.º 6
0
    def __init__(self,
                 client,
                 column_family,
                 buffer_size=1024,
                 read_consistency_level=ConsistencyLevel.ONE,
                 write_consistency_level=ConsistencyLevel.ONE,
                 timestamp=gm_timestamp,
                 super=False,
                 dict_class=dict,
                 autopack_names=True,
                 autopack_values=True):
        """
        Constructs an abstraction of a Cassandra column family or super column family.

        Operations on this, such as `get` or `insert` will get data from or
        insert data into the corresponding Cassandra column family.

        :Parameters:
            `client`: :class:`cassandra.Cassandra.Client`
                Cassandra client with thrift API
            `column_family`: string
                The name of this ColumnFamily
            `buffer_size`: integer
                When calling `get_range`, the intermediate results need to be
                buffered if we are fetching many rows, otherwise the Cassandra
                server will overallocate memory and fail.  This is the size of
                that buffer.
            `read_consistency_level`: :class:`pycassa.cassandra.ttypes.ConsistencyLevel`
                Affects the guaranteed replication factor before returning from
                any read operation
            `write_consistency_level`: :class:`pycassa.cassandra.ttypes.ConsistencyLevel`
                Affects the guaranteed replication factor before returning from
                any write operation
            `timestamp`: function
                The default timestamp function returns:
                int(time.mktime(time.gmtime()))
                Or the number of seconds since Unix epoch in GMT.
                Set timestamp to replace the default timestamp function with your
                own.
            `super`: bool
                Whether this ColumnFamily has SuperColumns
            `dict_class` : class (must act like the dict type)
                The default dict_class is :class:`dict`.
                If the order of columns matter to you, pass your own dictionary
                class, or python 2.7's new :class:`collections.OrderedDict`. All returned
                rows and subcolumns are instances of this.
            `autopack_names`: bool
                Whether column and supercolumn names should be packed automatically
                based on the comparator and subcomparator for the column
                family.  This does not typically work when used with
                :class:`~pycassa.columnfamilymap.ColumnFamilyMap`.
            `autopack_values`: bool
                Whether column values should be packed automatically based on
                the validator_class for a given column.  This should probably
                be set to ``False`` when used with a
                :class:`~pycassa.columnfamilymap.ColumnFamilyMap`.

        """

        self.client = client
        self.column_family = column_family
        self.buffer_size = buffer_size
        self.read_consistency_level = read_consistency_level
        self.write_consistency_level = write_consistency_level
        self.timestamp = timestamp
        self.super = super
        self.dict_class = dict_class
        self.autopack_names = autopack_names
        self.autopack_values = autopack_values

        # Determine the ColumnFamily type to allow for auto conversion
        # so that packing/unpacking doesn't need to be done manually
        self.cf_data_type = None
        self.col_name_data_type = None
        self.supercol_name_data_type = None
        self.col_type_dict = dict()

        col_fam = None
        try:
            col_fam = client.get_keyspace_description()[self.column_family]
        except KeyError:
            raise NotFoundException('Column family %s not found.' %
                                    self.column_family)

        if col_fam is not None:
            if self.autopack_names:
                if not self.super:
                    self.col_name_data_type = col_fam.comparator_type
                else:
                    self.col_name_data_type = col_fam.subcomparator_type
                    self.supercol_name_data_type = self._extract_type_name(
                        col_fam.comparator_type)

                index = self.col_name_data_type = self._extract_type_name(
                    self.col_name_data_type)
            if self.autopack_values:
                self.cf_data_type = self._extract_type_name(
                    col_fam.default_validation_class)
                for name, cdef in col_fam.column_metadata.items():
                    self.col_type_dict[name] = self._extract_type_name(
                        cdef.validation_class)
Exemplo n.º 7
0
    def get_indexed_slices(self,
                           index_clause,
                           columns=None,
                           column_start="",
                           column_finish="",
                           column_reversed=False,
                           column_count=100,
                           include_timestamp=False,
                           super_column=None,
                           read_consistency_level=None):
        """
        Fetches a list of KeySlices from a Cassandra server based on an index clause

        :Parameters:
            `index_clause`: :class:`~pycassa.cassandra.ttypes.IndexClause`
                Limits the keys that are returned based on expressions that compare
                the value of a column to a given value.  At least one of the
                expressions in the IndexClause must be on an indexed column.
                .. seealso:: meth::pycassa.index.create_index_clause() and
                             meth::pycassa.index.create_index_expression().
            `columns`: [str]
                Limit the columns or super_columns fetched to the specified list
            `column_start`: str
                Only fetch when a column or super_column is >= column_start
            `column_finish`: str
                Only fetch when a column or super_column is <= column_finish
            `column_reversed`: bool
                Fetch the columns or super_columns in reverse order. This will do
                nothing unless you passed a dict_class to the constructor.
            `column_count`: int
                Limit the number of columns or super_columns fetched per key
            `include_timestamp`: bool
                If true, return a (value, timestamp) tuple for each column
            `super_column`: str
                Return columns only in this super_column
            `read_consistency_level`: :class:`pycassa.cassandra.ttypes.ConsistencyLevel`
                Affects the guaranteed replication factor before returning from
                any read operation

        :Returns:
            if include_timestamp == True: {key : {column : (value, timestamp)}}
            else: {key : {column : value}}
        """

        (super_column, column_start,
         column_finish) = self._pack_slice_cols(super_column, column_start,
                                                column_finish)

        packed_cols = None
        if columns is not None:
            packed_cols = []
            for col in columns:
                packed_cols.append(
                    self._pack_name(col, is_supercol_name=self.super))

        cp = ColumnParent(column_family=self.column_family,
                          super_column=super_column)
        sp = create_SlicePredicate(packed_cols, column_start, column_finish,
                                   column_reversed, column_count)

        # Pack the values in the index clause expressions
        new_exprs = []
        for expr in index_clause.expressions:
            new_exprs.append(IndexExpression(self._pack_name(expr.column_name), expr.op, \
                            self._pack_value(expr.value, expr.column_name)))
        index_clause.expressions = new_exprs

        keyslice_list = self.client.get_indexed_slices(
            cp, index_clause, sp, self._rcl(read_consistency_level))

        if len(keyslice_list) == 0:
            raise NotFoundException()
        return self._convert_KeySlice_list_to_dict_class(
            keyslice_list, include_timestamp)
Exemplo n.º 8
0
    def get(self,
            key,
            columns=None,
            column_start="",
            column_finish="",
            column_reversed=False,
            column_count=100,
            include_timestamp=False,
            super_column=None,
            read_consistency_level=None):
        """
        Fetch a key from a Cassandra server

        :Parameters:
            `key`: str
                The key to fetch
            `columns`: [str]
                Limit the columns or super_columns fetched to the specified list
            `column_start`: str
                Only fetch when a column or super_column is >= column_start
            `column_finish`: str
                Only fetch when a column or super_column is <= column_finish
            `column_reversed`: bool
                Fetch the columns or super_columns in reverse order. This will do
                nothing unless you passed a ``dict_class`` to the constructor.
            `column_count`: int
                Limit the number of columns or super_columns fetched per key
            `include_timestamp` : bool
                If true, return a (value, timestamp) tuple for each column
            `super_column`: str
                Return columns only in this super_column
            `read_consistency_level`: :class:`pycassa.cassandra.ttypes.ConsistencyLevel`
                Affects the guaranteed replication factor before returning from
                any read operation

        :Returns:
            if include_timestamp == True: {'column': ('value', timestamp)}
            else: {'column': 'value'}

        """

        super_column, column_start, column_finish = self._pack_slice_cols(
            super_column, column_start, column_finish)

        packed_cols = None
        if columns is not None:
            packed_cols = []
            for col in columns:
                packed_cols.append(
                    self._pack_name(col, is_supercol_name=self.super))

        cp = ColumnParent(column_family=self.column_family,
                          super_column=super_column)
        sp = create_SlicePredicate(packed_cols, column_start, column_finish,
                                   column_reversed, column_count)

        list_col_or_super = self.client.get_slice(
            key, cp, sp, self._rcl(read_consistency_level))

        if len(list_col_or_super) == 0:
            raise NotFoundException()
        return self._convert_ColumnOrSuperColumns_to_dict_class(
            list_col_or_super, include_timestamp)
Exemplo n.º 9
0
    def __init__(self, pool, column_family, buffer_size=1024,
                 read_consistency_level=ConsistencyLevel.ONE,
                 write_consistency_level=ConsistencyLevel.ONE,
                 timestamp=gm_timestamp, super=False,
                 dict_class=util.OrderedDict, autopack_names=True,
                 autopack_values=True):
        """
        An abstraction of a Cassandra column family or super column family.
        Operations on this, such as :meth:`get` or :meth:`insert` will get data from or
        insert data into the corresponding Cassandra column family with
        name `column_family`.

        `pool` is a :class:`~pycassa.pool.ConnectionPool` that the column
        family will use for all operations.  A connection is drawn from the
        pool before each operations and is returned afterwards.  Note that
        the keyspace to be used is determined by the pool.

        When calling :meth:`get_range()` or :meth:`get_indexed_slices()`,
        the intermediate results need to be buffered if we are fetching many
        rows, otherwise the Cassandra server will overallocate memory and fail.
        `buffer_size` is the size of that buffer in number of rows.  The default
        is 1024.

        `read_consistency_level` and `write_consistency_level` set the default
        consistency levels for every operation; these may be overridden
        per-operation. These should be instances of
        :class:`~pycassa.cassandra.ttypes.ConsistencyLevel`.  These default
        to level ``ONE``.

        Each :meth:`insert()` or :meth:`remove` sends a timestamp with every
        column. The `timestamp` parameter is a function that is used to get
        this timestamp when needed.  The default function is :meth:`gm_timestamp()`.

        Results are returned as dictionaries. :class:`~pycassa.util.OrderedDict` is
        used by default so that order is maintained. A different class, such as
        :class:`dict` may be used instead by passing `dict_class`.

        By default, column family definitions will be examined to determine
        what data type Cassandra expects for column names and values. When
        columns are retrieved or inserted, their names and values will be
        packed or unpacked if necessary to convert them to or from their
        binary representation. Automatic packing of names and values can
        be individually enabled or disabled with `autopack_names` and
        `autopack_values`.  When using :class:`~pycassa.columnfamilymap.ColumnFamilyMap`,
        these should both be set to ``False``.

        """

        self.pool = pool
        self._tlocal = threading.local()
        self._tlocal.client = None
        self.column_family = column_family
        self.buffer_size = buffer_size
        self.read_consistency_level = read_consistency_level
        self.write_consistency_level = write_consistency_level
        self.timestamp = timestamp
        self.dict_class = dict_class
        self.autopack_names = autopack_names
        self.autopack_values = autopack_values

        # Determine the ColumnFamily type to allow for auto conversion
        # so that packing/unpacking doesn't need to be done manually
        self.cf_data_type = None
        self.col_name_data_type = None
        self.supercol_name_data_type = None
        self.col_type_dict = dict()

        col_fam = None
        try:
            try:
                self._obtain_connection()
                col_fam = self._tlocal.client.get_keyspace_description(use_dict_for_col_metadata=True)[self.column_family]
            except KeyError:
                nfe = NotFoundException()
                nfe.why = 'Column family %s not found.' % self.column_family
                raise nfe
        finally:
            self._release_connection()

        if col_fam is not None:
            self.super = col_fam.column_type == 'Super'
            if self.autopack_names:
                if not self.super:
                    self.col_name_data_type = col_fam.comparator_type
                else:
                    self.col_name_data_type = col_fam.subcomparator_type
                    self.supercol_name_data_type = util.extract_type_name(col_fam.comparator_type)

                index = self.col_name_data_type = util.extract_type_name(self.col_name_data_type)
            if self.autopack_values:
                self.cf_data_type = util.extract_type_name(col_fam.default_validation_class)
                for name, cdef in col_fam.column_metadata.items():
                    self.col_type_dict[name] = util.extract_type_name(cdef.validation_class)
Exemplo n.º 10
0
    def __init__(self, pool, column_family, buffer_size=1024,
                 read_consistency_level=ConsistencyLevel.ONE,
                 write_consistency_level=ConsistencyLevel.ONE,
                 timestamp=gm_timestamp, super=False,
                 dict_class=OrderedDict, autopack_names=True,
                 autopack_values=True):
        """
        Constructs an abstraction of a Cassandra column family or super column family.

        Operations on this, such as :meth:`get` or :meth:`insert` will get data from or
        insert data into the corresponding Cassandra column family.

        :param pool: A connection pool to a Cassandra cluster
        :type client: :class:`~pycassa.pool.AbstractPool`

        :param column_family: The name of the column family
        :type column_family: string

        :param buffer_size: When calling :meth:`get_range()` or
          :meth:`get_indexed_slices()`, the intermediate results need
          to be buffered if we are fetching many rows, otherwise the
          Cassandra server will overallocate memory and fail.  This
          is the size of that buffer in number of rows.
        :type buffer_size: int

        :param read_consistency_level: Affects the guaranteed replication factor
          before returning from any read operation
        :type read_consistency_level: :class:`~pycassa.cassandra.ttypes.ConsistencyLevel`

        :param write_consistency_level: Affects the guaranteed replication
          factor before returning from any write operation
        :type write_consistency_level: :class:`~pycassa.cassandra.ttypes.ConsistencyLevel`

        :param timestamp:
          The default timestamp function returns
          ``int(time.mktime(time.gmtime()))``,
          the number of seconds since Unix epoch in GMT.
          Set this to replace the default timestamp function with your own.
        :type timestamp: function

        :param dict_class: The default dict_class is :class:`~pycassa.util.OrderedDict`.
          All returned rows and subcolumns are instances of this.
        :type dict_class: class

        :param autopack_names: Whether column and supercolumn names should
          be packed automatically based on the comparator and subcomparator
          for the column family.  This does not typically work when used with
          :class:`~pycassa.columnfamilymap.ColumnFamilyMap`.
        :type autopack_names: bool

        :param autopack_values: Whether column values should be packed
          automatically based on the validator_class for a given column.
          This should probably be set to ``False`` when used with a
          :class:`~pycassa.columnfamilymap.ColumnFamilyMap`.
        :type autopack_values: bool

        :param super: Whether this column family has super columns. This
          is detected automatically since 0.5.1.

          .. deprecated:: 0.5.1

        :type super: bool

        """

        self.pool = pool
        self.client = None
        self.column_family = column_family
        self.buffer_size = buffer_size
        self.read_consistency_level = read_consistency_level
        self.write_consistency_level = write_consistency_level
        self.timestamp = timestamp
        self.dict_class = dict_class
        self.autopack_names = autopack_names
        self.autopack_values = autopack_values

        # Determine the ColumnFamily type to allow for auto conversion
        # so that packing/unpacking doesn't need to be done manually
        self.cf_data_type = None
        self.col_name_data_type = None
        self.supercol_name_data_type = None
        self.col_type_dict = dict()

        col_fam = None
        try:
            self.client = self.pool.get()
            col_fam = self.client.get_keyspace_description(use_dict_for_col_metadata=True)[self.column_family]
        except KeyError:
            nfe = NotFoundException()
            nfe.why = 'Column family %s not found.' % self.column_family
            raise nfe
        finally:
            self.client.return_to_pool()

        if col_fam is not None:
            self.super = col_fam.column_type == 'Super'
            if self.autopack_names:
                if not self.super:
                    self.col_name_data_type = col_fam.comparator_type
                else:
                    self.col_name_data_type = col_fam.subcomparator_type
                    self.supercol_name_data_type = self._extract_type_name(col_fam.comparator_type)

                index = self.col_name_data_type = self._extract_type_name(self.col_name_data_type)
            if self.autopack_values:
                self.cf_data_type = self._extract_type_name(col_fam.default_validation_class)
                for name, cdef in col_fam.column_metadata.items():
                    self.col_type_dict[name] = self._extract_type_name(cdef.validation_class)
Exemplo n.º 11
0
    def __init__(self,
                 pool,
                 column_family,
                 buffer_size=1024,
                 read_consistency_level=ConsistencyLevel.ONE,
                 write_consistency_level=ConsistencyLevel.ONE,
                 timestamp=gm_timestamp,
                 super=False,
                 dict_class=util.OrderedDict,
                 autopack_names=True,
                 autopack_values=True):
        """
        An abstraction of a Cassandra column family or super column family.
        Operations on this, such as :meth:`get` or :meth:`insert` will get data from or
        insert data into the corresponding Cassandra column family with
        name `column_family`.

        `pool` is a :class:`~pycassa.pool.ConnectionPool` that the column
        family will use for all operations.  A connection is drawn from the
        pool before each operations and is returned afterwards.  Note that
        the keyspace to be used is determined by the pool.

        When calling :meth:`get_range()` or :meth:`get_indexed_slices()`,
        the intermediate results need to be buffered if we are fetching many
        rows, otherwise the Cassandra server will overallocate memory and fail.
        `buffer_size` is the size of that buffer in number of rows.  The default
        is 1024.

        `read_consistency_level` and `write_consistency_level` set the default
        consistency levels for every operation; these may be overridden
        per-operation. These should be instances of
        :class:`~pycassa.cassandra.ttypes.ConsistencyLevel`.  These default
        to level ``ONE``.

        Each :meth:`insert()` or :meth:`remove` sends a timestamp with every
        column. The `timestamp` parameter is a function that is used to get
        this timestamp when needed.  The default function is :meth:`gm_timestamp()`.

        Results are returned as dictionaries. :class:`~pycassa.util.OrderedDict` is
        used by default so that order is maintained. A different class, such as
        :class:`dict` may be used instead by passing `dict_class`.

        By default, column family definitions will be examined to determine
        what data type Cassandra expects for column names and values. When
        columns are retrieved or inserted, their names and values will be
        packed or unpacked if necessary to convert them to or from their
        binary representation. Automatic packing of names and values can
        be individually enabled or disabled with `autopack_names` and
        `autopack_values`.  When using :class:`~pycassa.columnfamilymap.ColumnFamilyMap`,
        these should both be set to ``False``.

        """

        self.pool = pool
        self._tlocal = threading.local()
        self._tlocal.client = None
        self.column_family = column_family
        self.buffer_size = buffer_size
        self.read_consistency_level = read_consistency_level
        self.write_consistency_level = write_consistency_level
        self.timestamp = timestamp
        self.dict_class = dict_class
        self.autopack_names = autopack_names
        self.autopack_values = autopack_values

        # Determine the ColumnFamily type to allow for auto conversion
        # so that packing/unpacking doesn't need to be done manually
        self.cf_data_type = None
        self.col_name_data_type = None
        self.supercol_name_data_type = None
        self.col_type_dict = dict()

        col_fam = None
        try:
            try:
                self._obtain_connection()
                col_fam = self._tlocal.client.get_keyspace_description(
                    use_dict_for_col_metadata=True)[self.column_family]
            except KeyError:
                nfe = NotFoundException()
                nfe.why = 'Column family %s not found.' % self.column_family
                raise nfe
        finally:
            self._release_connection()

        if col_fam is not None:
            self.super = col_fam.column_type == 'Super'
            if self.autopack_names:
                if not self.super:
                    self.col_name_data_type = col_fam.comparator_type
                else:
                    self.col_name_data_type = col_fam.subcomparator_type
                    self.supercol_name_data_type = util.extract_type_name(
                        col_fam.comparator_type)

                index = self.col_name_data_type = util.extract_type_name(
                    self.col_name_data_type)
            if self.autopack_values:
                self.cf_data_type = util.extract_type_name(
                    col_fam.default_validation_class)
                for name, cdef in col_fam.column_metadata.items():
                    self.col_type_dict[name] = util.extract_type_name(
                        cdef.validation_class)