Пример #1
0
    def test_itereate_chunks(self):
        mat = numpy.array([[1, 2, 3], [4, 5, 6]])
        exp = [[[1, 2, 3]], [[4, 5, 6]]]
        res = list(iterate_matrix_chunks(mat, chunk_size=1))
        assert numpy.all(res[0] == exp[0])
        assert numpy.all(res[1] == exp[1])

        exp = [[2], [5]]
        res = list(iterate_matrix_chunks(mat, chunk_size=1, sample_idx=1))
        assert numpy.all(res[0] == exp[0])
        assert numpy.all(res[1] == exp[1])
Пример #2
0
    def test_itereate_chunks(self):
        mat = numpy.array([[1, 2, 3], [4, 5, 6]])
        exp = [[[1, 2, 3]], [[4, 5, 6]]]
        res = list(iterate_matrix_chunks(mat, chunk_size=1))
        assert numpy.all(res[0] == exp[0])
        assert numpy.all(res[1] == exp[1])

        exp = [[2], [5]]
        res = list(iterate_matrix_chunks(mat, chunk_size=1, sample_idx=1))
        assert numpy.all(res[0] == exp[0])
        assert numpy.all(res[1] == exp[1])
Пример #3
0
def _row_value_counter(mat, value, ratio=False, by_chunk=False):
    ndims = len(mat.shape)
    if ndims == 1:
        raise ValueError('The matrix has to have at least 2 dimensions')
    elif ndims == 2:
        axes = 1
    else:
        axes = tuple([i + 1 for i in range(ndims - 1)])

    if by_chunk:
        chunks = iterate_matrix_chunks(mat)
        result = numpy.zeros(mat.shape[0])
        start = 0
        for chunk in chunks:
            chunk_result = _row_value_counter_array(chunk, value, axes)
            end = start + chunk_result.shape[0]
            result[start:end] = chunk_result
            start = end
    else:
        if is_dataset(mat):
            mat = mat[...]
        result = _row_value_counter_array(mat, value, axes)

    if ratio:
        num_items_per_row = reduce(operator.mul, mat.shape[1:], 1)
        result = result / num_items_per_row
    return result
Пример #4
0
def _row_value_counter(mat, value, ratio=False, by_chunk=False):
    ndims = len(mat.shape)
    if ndims == 1:
        raise ValueError('The matrix has to have at least 2 dimensions')
    elif ndims == 2:
        axes = 1
    else:
        axes = tuple([i + 1 for i in range(ndims - 1)])

    if by_chunk:
        chunks = iterate_matrix_chunks(mat)
        result = numpy.zeros(mat.shape[0])
        start = 0
        for chunk in chunks:
            chunk_result = _row_value_counter_array(chunk, value, axes)
            end = start + chunk_result.shape[0]
            result[start:end] = chunk_result
            start = end
    else:
        if is_dataset(mat):
            mat = mat[...]
        result = _row_value_counter_array(mat, value, axes)

    if ratio:
        num_items_per_row = reduce(operator.mul, mat.shape[1:], 1)
        result = result / num_items_per_row
    return result
Пример #5
0
def is_variant(chunks):
    gts = _get_chunks_gt(chunks)
    is_var = numpy.ndarray(gts.shape[0])
    start = 0
    for chunk in iterate_matrix_chunks(gts):
        stop = start + chunk.shape[0]
        is_var[start:stop] = numpy.sum(chunk > 0, axis=(1, 2)) >= 1
        start = stop
    return is_var
Пример #6
0
def allele_count(chunks, allele=1):
    """Calculate number of observations of the given allele per variant."""
    gts = _get_chunks_gt(chunks)
    a_count = numpy.ndarray(gts.shape[0])
    start = 0
    for chunk in iterate_matrix_chunks(gts):
        stop = start + chunk.shape[0]
        a_count[start:stop] = numpy.sum(chunk == allele, axis=(1, 2))
        start = stop
    return a_count
Пример #7
0
def allele_number(chunks):
    """Count the number of non-missing allele calls per variant."""
    gts = _get_chunks_gt(chunks)
    allele_num = numpy.ndarray(gts.shape[0])
    start = 0
    for chunk in iterate_matrix_chunks(gts):
        stop = start + chunk.shape[0]
        allele_num[start:stop] = numpy.sum(chunk >= 0, axis=(1, 2))
        start = stop
    return allele_num
Пример #8
0
def is_doubleton(chunks, allele=1):
    """Find variants with only two instance of `allele` observed."""
    gts = _get_chunks_gt(chunks)
    start = 0
    is_double = numpy.ndarray(gts.shape[0])
    for chunk in iterate_matrix_chunks(gts):
        stop = start + chunk.shape[0]
        is_double[start:stop] = numpy.sum(chunk == allele, axis=(1, 2)) == 2
        start = stop
    return is_double