Exemplo n.º 1
0
def preprocess_hic(
    clr: cooler.Cooler,
    min_contacts: Optional[int] = None,
    region: Optional[str] = None,
) -> sp.csr_matrix:
    """
    Given an input cooler object, returns the preprocessed Hi-C matrix.
    Preprocessing involves (in that order): subsetting region, subsampling
    contacts, normalisation, detrending (obs / exp). Balancing weights must
    be pre-computer in the referenced cool file. Region must be in UCSC format.
    """
    # Load raw matrix and subset region if requested
    mat = clr.matrix(sparse=True, balance=False)
    bins = clr.bins()
    if region is None:
        mat = mat[:]
        bins = bins[:]
    else:
        mat = mat.fetch(region)
        bins = bins.fetch(region)
    try:
        biases = bins["weight"].values
    except KeyError as err:
        sys.stderr.write("Error: Input cooler must be balanced.\n")
        raise err
    # get to same coverage if requested and matrix is not empty
    if mat.sum() and (min_contacts is not None):
        mat = cup.subsample_contacts(mat, min_contacts).tocoo()
    valid = cup.get_detectable_bins(mat, n_mads=5)

    # balance region with weights precomputed on the whole matrix
    mat.data = mat.data * biases[mat.row] * biases[mat.col]
    # Detrend for P(s)
    mat = cup.detrend(mat.tocsr(), smooth=False, detectable_bins=valid[0])
    # Replace NaNs by 0s
    mat.data = np.nan_to_num(mat.data)
    mat.eliminate_zeros()
    return mat
Exemplo n.º 2
0
def test_detrend(matrix):
    """Basic test: Check if detrended matrix pixels have lower standard deviation"""
    detrended = preproc.detrend(matrix)
    for d in range(matrix.shape[0] // 10):
        assert matrix.diagonal(d).std() > detrended.diagonal(d).std()