示例#1
0
    def _compare_value(self, other, op):

        _rowid = Index(0)
        for rowid, val in zip(self._rowid, self._seq):
            try:
                if op(val, other):
                    _rowid.append(rowid)
            except:
                pass
        return self._datamatrix._selectrowid(_rowid)
示例#2
0
def _upgrade_datamatrix_index(dm):
    """Fixes the Index object of deprecated versions of DataMatrix."""

    from datamatrix._datamatrix._index import Index
    object.__setattr__(dm, '_rowid', Index(dm._rowid._l))
    for colname, col in dm.columns:
        if hasattr(col._rowid, '_l'):
            object.__setattr__(col, '_rowid', Index(col._rowid._l))
        else:
            object.__setattr__(col, '_rowid', Index(col._rowid))
    return dm
示例#3
0
    def _compare_sequence(self, other, op):

        _rowid = Index(0)
        for rowid, val, ref in zip(self._rowid, self._seq,
                                   self._tosequence(other)):
            try:
                if op(val, ref):
                    _rowid.append(rowid)
            except:
                pass
        return self._datamatrix._selectrowid(_rowid)
示例#4
0
    def _compare_set(self, other, op):

        if op == operator.__eq__:
            test = lambda val: any(val == v for v in other)
        elif op == operator.__ne__:
            test = lambda val: all(val != v for v in other)
        else:
            raise TypeError('sets can only be compared with == or !=')
        _rowid = Index(0)
        for rowid, val in zip(self._rowid, self._seq):
            try:
                if test(val):
                    _rowid.append(rowid)
            except:
                pass
        return self._datamatrix._selectrowid(_rowid)
示例#5
0
    def _merge(self, other, _rowid):
        """
		visible: False

		desc:
			Merges this column with another column, selecting only the rows
			indicated by _rowid.

		arguments:
			other:	Another column.
			_rowid:	A list of row ids to select.

		returns:
			type: BaseColumn
		"""

        col = self._empty_col()
        col._rowid = Index(_rowid)
        col._seq = []
        for row in _rowid:
            if row in self._rowid:
                col._seq.append(self._seq[self._rowid.index(row)])
            else:
                col._seq.append(other._seq[other._rowid.index(row)])
        return col
示例#6
0
    def _merge(self, other, _rowid):
        """
		visible: False

		desc:
			Merges this column with another column, selecting only the rows
			indicated by _rowid.

		arguments:
			other:	Another column.
			_rowid:	A list of row ids to select.

		returns:
			type: BaseColumn
		"""

        col = self._empty_col()
        col._rowid = Index(_rowid)
        col._seq = [None] * len(_rowid)
        self_row_id = set(self._rowid)
        for i, row in enumerate(_rowid):
            col._seq[i] = (self._seq[self._rowid.index(row)]
                           if row in self_row_id else
                           other._seq[other._rowid.index(row)])
        return col
示例#7
0
    def _setlength(self, value):
        """
        visible: False

        desc: |
            Changes the length of the current DataMatrix, adding or removing
            rows as necessary.

            *This modifies the current DataMatrix.*

            __Note__: The preferred way to change the length is by setting the
            length property:

            ~~~
            dm.length = 10
            ~~~

        arguments:
            value:
                desc:	The new length.
                type:	int
        """

        if value < len(self):
            object.__setattr__(self, u'_rowid', self._rowid[:value])
            for name, col in self._cols.items():
                self._cols[name] = self._cols[name][:value]
        else:
            startid = 0 if not len(self) else self._rowid.max + 1
            rowid = Index([i + startid for i in range(value - len(self))])
            object.__setattr__(self, u'_rowid', self._rowid.copy() + rowid)
            for name in self._cols:
                self._cols[name]._addrowid(rowid)
        self._mutate()
示例#8
0
    def __init__(self, length=0, default_col_type=MixedColumn, **columns):
        """
        desc:
            Constructor.

        keywords:
            length:
                desc:	The starting length of the DataMatrix.
                type:	int

        keyword-dict:
            columns:
                Columns can be initialized by passing them as keywords, where
                the keyword is the column name, and the value is the initial
                value for the column.
        """

        global _id
        try:
            length = int(length)
        except ValueError:
            raise TypeError('length should be an integer')
        object.__setattr__(self, u'_cols', OrderedDict())
        object.__setattr__(self, u'_rowid', Index(length))
        object.__setattr__(self, u'_default_col_type', default_col_type)
        object.__setattr__(self, u'_id', _id)
        object.__setattr__(self, u'_sorted', True)
        _id += 1
        for column_name, val in columns.items():
            self[column_name] = val
示例#9
0
def shuffle(obj):
    """
	desc: |
		Shuffles a DataMatrix or a column. If a DataMatrix is shuffled, the order
		of the rows is shuffled, but values that were in the same row will stay
		in the same row.

		__Example:__

		%--
		python: |
		 from datamatrix import DataMatrix, operations

		 dm = DataMatrix(length=5)
		 dm.A = 'a', 'b', 'c', 'd', 'e'
		 dm.B = operations.shuffle(dm.A)
		 print(dm)
		--%

	arguments:
		obj:
			type:	[DataMatrix, BaseColumn]

	returns:
		desc:	The shuffled DataMatrix or column.
		type:	[DataMatrix, BaseColumn]
	"""

    _rowid = Index(obj._rowid)
    random.shuffle(_rowid)
    if isinstance(obj, DataMatrix):
        return obj._selectrowid(_rowid)
    col = obj._getrowidkey(_rowid)
    col._rowid = obj._rowid
    return col
示例#10
0
def realdata():

    dm = io.readpickle('data/real-data.pkl')
    # If the buffered DataMatrix still uses a list-style row index, we convert
    # it to the new Index object with this hack.
    if isinstance(dm._rowid, list):
        from datamatrix._datamatrix._index import Index
        object.__setattr__(dm, u'_rowid', Index(dm._rowid))
    print(len(dm))
    return dm
示例#11
0
def filter_(fnc, obj):
    """
    desc: |
        Filters rows from a datamatrix or column based on filter function
        (`fnc`).

        If `obj` is a column, `fnc` should be a function that accepts a single
        value. If `obj` is a datamatrix, `fnc` should be a function that
        accepts a keyword `dict`, where column names are keys and cells are 
        values. In both cases, `fnc` should return a `bool` indicating whether 
        the row or value should be included.

        *New in v0.8.0*: You can also directly compare a column with a function
        or `lambda` expression. However, this is different from `filter_()` in
        that it returns a datamatrix object and not a column.

        __Example:__

        %--
        python: |
         from datamatrix import DataMatrix, functional as fnc

         dm = DataMatrix(length=5)
         dm.col = range(5)
         # Create a column with only odd values
         col_new = fnc.filter_(lambda x: x % 2, dm.col)
         print(col_new)
         # Create a new datamatrix with only odd values in col
         dm_new = fnc.filter_(lambda **d: d['col'] % 2, dm)
         print(dm_new)
        --%

    arguments:
        fnc:
            desc:	A filter function.
            type:	callable
        obj:
            desc:	A datamatrix or column to filter.
            type:	[BaseColumn, DataMatrix]

    returns:
        desc:	A new column or datamatrix.
        type:	[BaseColumn, DataMatrix]
    """

    if not callable(fnc):
        raise TypeError('fnc should be callable')
    if isinstance(obj, BaseColumn):
        return (obj == fnc)[obj.name]
    if not isinstance(obj, DataMatrix):
        raise TypeError(u'obj should be DataMatrix or BaseColumn')
    dm = obj
    keep = lambda fnc, row: fnc(**{col: val for col, val in row})
    return dm._selectrowid(
        Index([rowid for rowid, row in zip(dm._rowid, obj) if keep(fnc, row)]))
示例#12
0
    def __eq__(self, other):

        if isinstance(other, type):
            if other is self.dtype:
                return self._datamatrix
            return self._datamatrix._selectrowid(Index(0))
        if self._issequence(other):
            return super(IntColumn, self).__eq__(other)
        try:
            return super(IntColumn, self).__eq__(other)
        except TypeError:
            # If the other value is not an int, then nothing is equal to it
            return self._compare_value(
                0, lambda x, y: np.zeros(len(self._datamatrix)))
示例#13
0
    def _sortedrowid(self):
        """
		visible: False

		desc:
			Gives a list of rowids that are ordered such that they sort the
			column.

		returns:
			An iterator.
		"""

        s = sorted(zip(self._seq, self._rowid), key=lambda x: sortable(x[0]))
        return Index([rowid for val, rowid in s])
示例#14
0
    def _compare_function(self, other, op):

        if op == operator.__eq__:
            test = other
        elif op == operator.__ne__:
            test = lambda val: not other(val)
        else:
            raise TypeError('functions can only be compared with == or !=')
        if not len(inspect.getargspec(other).args) == 1:
            raise TypeError('function must take exactly one argument')
        return self._datamatrix._selectrowid(
            Index([
                rowid for rowid, val in zip(self._rowid, self._seq)
                if test(val)
            ]))
示例#15
0
    def _compare_nan(self, other, op):

        _rowid = Index(0)
        if op is operator.eq:
            for rowid, val in zip(self._rowid, self._seq):
                if math.isnan(val):
                    _rowid.append(rowid)
        elif op is operator.ne:
            for rowid, val in zip(self._rowid, self._seq):
                if not math.isnan(val):
                    _rowid.append(rowid)
        else:
            raise TypeError('nans can only be compared with == or !=')
        return self._datamatrix._selectrowid(_rowid)
示例#16
0
    def _compare_type(self, type_, op):

        _rowid = Index(0)
        if op is operator.eq:
            for rowid, val in zip(self._rowid, self._seq):
                if isinstance(val, type_):
                    _rowid.append(rowid)
        elif op is operator.ne:
            for rowid, val in zip(self._rowid, self._seq):
                if not isinstance(val, type_):
                    _rowid.append(rowid)
        else:
            raise TypeError('types can only be compared with == or !=')
        return self._datamatrix._selectrowid(_rowid)
示例#17
0
    def _getsequencekey(self, key):
        """
		visible: False

		desc:
			Gets a slice of this column by list or some other iterable.

		arguments:
			key:	A list or other iterable object.

		returns:
			BaseColunn
		"""

        col = self._empty_col()
        col._rowid = Index()
        col._seq = []
        for i in key:
            col._rowid.append(self._rowid[i])
            col._seq.append(self._seq[i])
        return col
示例#18
0
def random_sample(obj, k):
    
    """
    desc: |
        *New in v0.11.0*
    
        Takes a random sample of `k` rows from a DataMatrix or column. The
        order of the rows in the returned DataMatrix is random.
        
        __Example:__
        
        ```python
        from datamatrix import DataMatrix, operations as ops

        dm = DataMatrix(length=5)
        dm.A = 'a', 'b', 'c', 'd', 'e'
        dm = ops.random_sample(dm, k=3)
        print(dm)
        ```

    arguments:
        obj:
            type:	[DataMatrix, BaseColumn]
        k:
            type:	int

    returns:
        desc:	A random sample from a DataMatrix or column.
        type:	[DataMatrix, BaseColumn]	
    """
    
    _rowid = Index(obj._rowid)
    _rowid = random.sample(list(_rowid), k)
    if isinstance(obj, DataMatrix):
        return obj._selectrowid(_rowid)
    col = obj._getrowidkey(_rowid)
    col._rowid = obj._rowid
    return col
    def _compare_value(self, other, op):

        _other = self._checktype(other)
        if np.isnan(_other):
            # NaN is usually not equal to itself. Here we implement equality
            # for NaN, as though NaN is equal to itself. This behavior may
            # change in the future
            if op is operator.eq:
                b = np.isnan(self._seq)
            elif op is operator.ne:
                b = ~np.isnan(self._seq)
            else:
                raise TypeError(u'Cannot compare FloatColumn to %s' % other)
        elif np.isinf(_other):
            if op is operator.eq:
                b = np.isinf(self._seq)
            elif op is operator.ne:
                b = ~np.isinf(self._seq)
            else:
                raise TypeError(u'Cannot compare FloatColumn to %s' % other)
        else:
            b = op(self._seq, _other)
        i = np.where(b)[0]
        return self._datamatrix._selectrowid(Index(self._rowid[i]))
示例#20
0
    def __xor__(self, other):

        selection = Index(set(self._rowid) ^ set(other._rowid))
        return self._merge(other, selection.sorted())
    def _compare_sequence(self, other, op):

        _other = self._tosequence(other)
        i = np.where(op(self._seq, _other))
        return self._datamatrix._selectrowid(Index(self._rowid[i]))
    def _sortedrowid(self):

        return Index(self._rowid[self._seq.argsort()])