Exemplo n.º 1
0
    def test_ssl01_scan_int8(self):
        from allel.opt.stats import nsl01_scan_int8

        h = np.array([[0, 0, 0, 1, 1, 1],
                      [0, 0, 0, 1, 1, 1],
                      [0, 0, 0, 1, 1, 1],
                      [0, 0, 0, 1, 1, 1]], dtype='i1')
        nsl0, nsl1 = nsl01_scan_int8(h)
        expect_nsl0 = [1, 2, 3, 4]
        assert_array_nanclose(expect_nsl0, nsl0)
        expect_nsl1 = [1, 2, 3, 4]
        assert_array_nanclose(expect_nsl1, nsl1)

        h = np.array([[0, 0, 0, 1],
                      [0, 0, 1, 0],
                      [0, 1, 0, 0],
                      [1, 0, 0, 0]], dtype='i1')
        nsl0, nsl1 = nsl01_scan_int8(h)
        expect_nsl0 = [1, 4/3, 4/3, 4/3]
        assert_array_nanclose(expect_nsl0, nsl0)
        expect_nsl1 = [np.nan, np.nan, np.nan, np.nan]
        assert_array_nanclose(expect_nsl1, nsl1)

        h = np.array([[0, 0, 1],
                      [0, 1, 1],
                      [1, 1, 0],
                      [1, 0, 0]], dtype='i1')
        nsl0, nsl1 = nsl01_scan_int8(h)
        expect_nsl0 = [1, np.nan, np.nan, 1]
        assert_array_nanclose(expect_nsl0, nsl0)
        expect_nsl1 = [np.nan, 1, 1, np.nan]
        assert_array_nanclose(expect_nsl1, nsl1)
Exemplo n.º 2
0
def nsl(h):
    """Compute the unstandardized number of segregating sites by length (nSl)
    for each variant, comparing the reference and alternate alleles,
    after Ferrer-Admetlla et al. (2014).

    Parameters
    ----------
    h : array_like, int, shape (n_variants, n_haplotypes)
        Haplotype array.

    Returns
    -------
    score : ndarray, float, shape (n_variants,)

    Notes
    -----
    This function will calculate nSl for all variants. To exclude variants
    below a given minor allele frequency, filter the input haplotype array
    before passing to this function.

    The function only expects segregating sites, so ensure any
    non-segregating sites are removed before passing in the haplotype array.

    This function computes nSl by comparing the reference and alternate
    alleles. These can be polarised by switching the sign for any variant where
    the reference allele is derived.

    This function does nothing about nSl calculations where haplotype
    homozygosity extends up to the first or last variant. There will be edge
    effects.

    This function currently does nothing to account for large gaps between
    variants. There will be edge effects near any large gaps.

    This function returns unstandardised scores. Typically nSl scores are
    are normalised by subtracting the mean and dividing by the standard
    deviation.

    """

    from allel.opt.stats import nsl01_scan_int8

    # check there are no invariant sites
    ac = h.count_alleles()
    assert np.all(ac.is_segregating()), 'please remove non-segregating sites'

    # scan forward
    nsl0_fwd, nsl1_fwd = nsl01_scan_int8(h)

    # scan backward
    nsl0_rev, nsl1_rev = nsl01_scan_int8(h[::-1])
    nsl0_rev = nsl0_rev[::-1]
    nsl1_rev = nsl1_rev[::-1]

    # compute unstandardized score
    nsl0 = nsl0_fwd + nsl0_rev
    nsl1 = nsl1_fwd + nsl1_rev
    score = np.log(nsl1 / nsl0)

    return score
Exemplo n.º 3
0
def test_nsl01_scan_int8_c():

    h = np.array([[0, 0, 1], [0, 1, 1], [1, 1, 0], [1, 0, 0]], dtype='i1')
    nsl0, nsl1 = nsl01_scan_int8(h)
    expect_nsl0 = [1, np.nan, np.nan, 1]
    assert_array_nanclose(expect_nsl0, nsl0)
    expect_nsl1 = [np.nan, 1, 1, np.nan]
    assert_array_nanclose(expect_nsl1, nsl1)
Exemplo n.º 4
0
def test_nsl01_scan_int8_a():

    h = np.array([[0, 0, 0, 1, 1, 1], [0, 0, 0, 1, 1, 1], [0, 0, 0, 1, 1, 1],
                  [0, 0, 0, 1, 1, 1]],
                 dtype='i1')
    nsl0, nsl1 = nsl01_scan_int8(h)
    expect_nsl0 = [1, 2, 3, 4]
    assert_array_nanclose(expect_nsl0, nsl0)
    expect_nsl1 = [1, 2, 3, 4]
    assert_array_nanclose(expect_nsl1, nsl1)
Exemplo n.º 5
0
def test_nsl01_scan_int8_a():

    h = np.array([[0, 0, 0, 1, 1, 1],
                  [0, 0, 0, 1, 1, 1],
                  [0, 0, 0, 1, 1, 1],
                  [0, 0, 0, 1, 1, 1]], dtype='i1')
    nsl0, nsl1 = nsl01_scan_int8(h)
    expect_nsl0 = [1, 2, 3, 4]
    assert_array_nanclose(expect_nsl0, nsl0)
    expect_nsl1 = [1, 2, 3, 4]
    assert_array_nanclose(expect_nsl1, nsl1)
Exemplo n.º 6
0
def test_nsl01_scan_int8_c():

    h = np.array([[0, 0, 1],
                  [0, 1, 1],
                  [1, 1, 0],
                  [1, 0, 0]], dtype='i1')
    nsl0, nsl1 = nsl01_scan_int8(h)
    expect_nsl0 = [1, np.nan, np.nan, 1]
    assert_array_nanclose(expect_nsl0, nsl0)
    expect_nsl1 = [np.nan, 1, 1, np.nan]
    assert_array_nanclose(expect_nsl1, nsl1)
Exemplo n.º 7
0
def nsl(h, use_threads=True):
    """Compute the unstandardized number of segregating sites by length (nSl)
    for each variant, comparing the reference and alternate alleles,
    after Ferrer-Admetlla et al. (2014).

    Parameters
    ----------
    h : array_like, int, shape (n_variants, n_haplotypes)
        Haplotype array.
    use_threads : bool, optional
        If True use multiple threads to compute.

    Returns
    -------
    score : ndarray, float, shape (n_variants,)

    Notes
    -----
    This function will calculate nSl for all variants. To exclude variants
    below a given minor allele frequency, filter the input haplotype array
    before passing to this function.

    This function computes nSl by comparing the reference and alternate
    alleles. These can be polarised by switching the sign for any variant where
    the reference allele is derived.

    This function does nothing about nSl calculations where haplotype
    homozygosity extends up to the first or last variant. There may be edge
    effects.

    Note that the unstandardized score is returned. Usually these scores are
    then standardized in different allele frequency bins.

    See Also
    --------
    standardize_by_allele_count

    """

    from allel.opt.stats import nsl01_scan_int8

    # check inputs
    h = HaplotypeArray(np.asarray(h, dtype="i1"))

    # # check there are no invariant sites
    # ac = h.count_alleles()
    # assert np.all(ac.is_segregating()), 'please remove non-segregating sites'

    if use_threads and multiprocessing.cpu_count() > 1:

        # create pool
        pool = ThreadPool(2)

        # scan forward
        result_fwd = pool.apply_async(nsl01_scan_int8, args=(h,))

        # scan backward
        result_rev = pool.apply_async(nsl01_scan_int8, args=(h[::-1],))

        # wait for both to finish
        pool.close()
        pool.join()

        # obtain results
        nsl0_fwd, nsl1_fwd = result_fwd.get()
        nsl0_rev, nsl1_rev = result_rev.get()

    else:

        # scan forward
        nsl0_fwd, nsl1_fwd = nsl01_scan_int8(h)

        # scan backward
        nsl0_rev, nsl1_rev = nsl01_scan_int8(h[::-1])

    # handle backwards
    nsl0_rev = nsl0_rev[::-1]
    nsl1_rev = nsl1_rev[::-1]

    # compute unstandardized score
    nsl0 = nsl0_fwd + nsl0_rev
    nsl1 = nsl1_fwd + nsl1_rev
    score = np.log(nsl1 / nsl0)

    return score