示例#1
0
def test_csr_from_coo_novals(data, nrows, ncols):
    n = nrows * ncols
    nnz = data.draw(st.integers(0, int(n * 0.75)))
    _log.info('testing %d×%d (%d nnz) with no values', nrows, ncols, nnz)

    coords = st.integers(0, max(n - 1, 0))
    coords = data.draw(nph.arrays(np.int32, nnz, elements=coords, unique=True))
    rows = np.mod(coords, nrows, dtype=np.int32)
    cols = np.floor_divide(coords, nrows, dtype=np.int32)

    csr = CSR.from_coo(rows, cols, None, (nrows, ncols))

    rowinds = csr.rowinds()
    assert csr.nrows == nrows
    assert csr.ncols == ncols
    assert csr.nnz == nnz

    for i in range(nrows):
        sp = csr.rowptrs[i]
        ep = csr.rowptrs[i + 1]
        assert ep - sp == np.sum(rows == i)
        points, = np.nonzero(rows == i)
        assert len(points) == ep - sp
        po = np.argsort(cols[points])
        points = points[po]
        assert all(np.sort(csr.colinds[sp:ep]) == cols[points])
        assert all(np.sort(csr.row_cs(i)) == cols[points])
        assert all(rowinds[sp:ep] == i)

        row = csr.row(i)
        assert np.sum(row) == ep - sp
示例#2
0
def test_csr_str():
    rows = np.array([0, 0, 1, 3], dtype=np.int32)
    cols = np.array([1, 2, 0, 1], dtype=np.int32)
    vals = np.arange(4, dtype=np.float_)

    csr = CSR.from_coo(rows, cols, vals)

    assert '4x3' in str(csr)
示例#3
0
def test_csr_rowinds():
    rows = np.array([0, 0, 1, 3], dtype=np.int32)
    cols = np.array([1, 2, 0, 1], dtype=np.int32)
    vals = np.arange(4, dtype=np.float_)
    csr = CSR.from_coo(rows, cols, vals)

    ris = csr.rowinds()
    assert all(ris == rows)
示例#4
0
def test_csr_row_fixed():
    rows = np.array([0, 0, 1, 3], dtype=np.int32)
    cols = np.array([1, 2, 0, 1], dtype=np.int32)
    vals = np.arange(4, dtype=np.float_) + 1

    csr = CSR.from_coo(rows, cols, vals)
    assert all(csr.row(0) == np.array([0, 1, 2], dtype=np.float_))
    assert all(csr.row(1) == np.array([3, 0, 0], dtype=np.float_))
    assert all(csr.row(2) == np.array([0, 0, 0], dtype=np.float_))
    assert all(csr.row(3) == np.array([0, 4, 0], dtype=np.float_))
示例#5
0
def test_csr_row_extent_fixed():
    rows = np.array([0, 0, 1, 3], dtype=np.int32)
    cols = np.array([1, 2, 0, 1], dtype=np.int32)
    vals = np.arange(4, dtype=np.float_) + 1
    csr = CSR.from_coo(rows, cols, vals)

    assert csr.row_extent(0) == (0, 2)
    assert csr.row_extent(1) == (2, 3)
    assert csr.row_extent(2) == (3, 3)
    assert csr.row_extent(3) == (3, 4)
示例#6
0
def test_csr_set_values():
    rows = np.array([0, 0, 1, 3], dtype=np.int32)
    cols = np.array([1, 2, 0, 1], dtype=np.int32)
    vals = np.arange(4, dtype=np.float_)

    csr = CSR.from_coo(rows, cols, vals)

    v2 = 10 - vals
    csr.values = v2

    assert all(csr.values == v2)
示例#7
0
def test_csr_from_coo_fixed():
    "Make a CSR from COO data"
    rows = np.array([0, 0, 1, 3], dtype=np.int32)
    cols = np.array([1, 2, 0, 1], dtype=np.int32)
    vals = np.arange(4, dtype=np.float_)

    csr = CSR.from_coo(rows, cols, vals)
    assert csr.nrows == 4
    assert csr.ncols == 3
    assert csr.nnz == 4
    assert csr.values == approx(vals)
示例#8
0
def test_csr_set_values_none():
    rows = np.array([0, 0, 1, 3], dtype=np.int32)
    cols = np.array([1, 2, 0, 1], dtype=np.int32)
    vals = np.arange(4, dtype=np.float_)

    csr = CSR.from_coo(rows, cols, vals)
    csr.values = None

    assert csr.values is None
    assert all(csr.row(0) == [0, 1, 1])
    assert all(csr.row(1) == [1, 0, 0])
    assert all(csr.row(3) == [0, 1, 0])
示例#9
0
def test_csr_set_values_oversize():
    rows = np.array([0, 0, 1, 3], dtype=np.int32)
    cols = np.array([1, 2, 0, 1], dtype=np.int32)
    vals = np.arange(4, dtype=np.float_)

    csr = CSR.from_coo(rows, cols, vals)

    v2 = np.arange(6, dtype=np.float_) + 10
    csr.values = v2

    assert csr.values is not None
    assert all(csr.values == v2[:4])
示例#10
0
def test_csr_set_values_undersize():
    rows = np.array([0, 0, 1, 3], dtype=np.int32)
    cols = np.array([1, 2, 0, 1], dtype=np.int32)
    vals = np.arange(4, dtype=np.float_)

    csr = CSR.from_coo(rows, cols, vals)

    v2 = np.arange(3, dtype=np.float_) + 5

    with raises(ValueError):
        csr.values = v2

    assert all(csr.values == vals)
示例#11
0
def test_csr_sparse_row():
    rows = np.array([0, 0, 1, 3], dtype=np.int32)
    cols = np.array([1, 2, 0, 1], dtype=np.int32)
    vals = np.arange(4, dtype=np.float_)

    csr = CSR.from_coo(rows, cols, vals)
    assert all(csr.row_cs(0) == np.array([1, 2], dtype=np.int32))
    assert all(csr.row_cs(1) == np.array([0], dtype=np.int32))
    assert all(csr.row_cs(2) == np.array([], dtype=np.int32))
    assert all(csr.row_cs(3) == np.array([1], dtype=np.int32))

    assert all(csr.row_vs(0) == np.array([0, 1], dtype=np.float_))
    assert all(csr.row_vs(1) == np.array([2], dtype=np.float_))
    assert all(csr.row_vs(2) == np.array([], dtype=np.float_))
    assert all(csr.row_vs(3) == np.array([3], dtype=np.float_))
示例#12
0
def test_csr_transpose_coords():
    rows = np.array([0, 0, 1, 3], dtype=np.int32)
    cols = np.array([1, 2, 0, 1], dtype=np.int32)
    vals = np.arange(4, dtype=np.float_)

    csr = CSR.from_coo(rows, cols, vals)
    csc = csr.transpose(False)
    assert csc.nrows == csr.ncols
    assert csc.ncols == csr.nrows

    assert all(csc.rowptrs == [0, 1, 3, 4])
    assert csc.colinds.max() == 3
    assert csc.values is None

    for r, c, v in zip(rows, cols, vals):
        row = csc.row(c)
        assert row[r] == 1
示例#13
0
文件: matrix.py 项目: keener101/lkpy
def sparse_ratings(ratings, scipy=False, *, users=None, items=None):
    """
    Convert a rating table to a sparse matrix of ratings.

    Args:
        ratings(pandas.DataFrame): a data table of (user, item, rating) triples.
        scipy(bool):
            if ``True`` or ``'csr'``, return a SciPy csr matrix instead of
            :py:class:`CSR`. if ``'coo'``, return a SciPy coo matrix.
        users(pandas.Index): an index of user IDs.
        items(pandas.Index): an index of items IDs.

    Returns:
        RatingMatrix:
            a named tuple containing the sparse matrix, user index, and item index.
    """
    if users is None:
        users = pd.Index(np.unique(ratings.user), name='user')

    if items is None:
        items = pd.Index(np.unique(ratings.item), name='item')

    _log.debug('creating matrix with %d ratings for %d items by %d users',
               len(ratings), len(items), len(users))

    row_ind = users.get_indexer(ratings.user).astype(np.intc)
    if np.any(row_ind < 0):
        raise ValueError('provided user index does not cover all users')
    col_ind = items.get_indexer(ratings.item).astype(np.intc)
    if np.any(col_ind < 0):
        raise ValueError('provided item index does not cover all users')

    if 'rating' in ratings.columns:
        vals = np.require(ratings.rating.values, np.float64)
    else:
        vals = None

    if scipy == 'coo':
        matrix = sps.coo_matrix((vals, (row_ind, col_ind)),
                                shape=(len(users), len(items)))
    else:
        matrix = CSR.from_coo(row_ind, col_ind, vals, (len(users), len(items)))
        if scipy:
            matrix = matrix.to_scipy()

    return RatingMatrix(matrix, users, items)
示例#14
0
def test_csr_from_coo(data, nrows, ncols, dtype):
    dtype = np.dtype(dtype)
    n = nrows * ncols
    nnz = data.draw(st.integers(0, int(n * 0.75)))
    _log.debug('testing %d×%d (%d nnz) of type %s', nrows, ncols, nnz, dtype)

    coords = st.integers(0, max(n - 1, 0))
    coords = data.draw(nph.arrays(np.int32, nnz, elements=coords, unique=True))
    rows = np.mod(coords, nrows, dtype=np.int32)
    cols = np.floor_divide(coords, nrows, dtype=np.int32)

    finite = nph.from_dtype(dtype, allow_infinity=False, allow_nan=False)
    vals = data.draw(nph.arrays(dtype, nnz, elements=finite))

    csr = CSR.from_coo(rows, cols, vals, (nrows, ncols))

    rowinds = csr.rowinds()
    assert csr.nrows == nrows
    assert csr.ncols == ncols
    assert csr.nnz == nnz

    for i in range(nrows):
        sp = csr.rowptrs[i]
        ep = csr.rowptrs[i + 1]
        assert ep - sp == np.sum(rows == i)
        points, = np.nonzero(rows == i)
        assert len(points) == ep - sp
        po = np.argsort(cols[points])
        points = points[po]
        assert all(np.sort(csr.colinds[sp:ep]) == cols[points])
        assert all(np.sort(csr.row_cs(i)) == cols[points])
        assert all(csr.values[np.argsort(csr.colinds[sp:ep]) +
                              sp] == vals[points])
        assert all(rowinds[sp:ep] == i)

        row = np.zeros(ncols, dtype)
        row[cols[points]] = vals[points]
        assert all(csr.row(i) == row)