Python api.Cooler.chroms примеры использования

Язык программирования: Python

Пространство имен/Пакет: cooler

Класс/Тип: api.Cooler

Метод/Функция: chroms

Примеров на hotexamples.com: 3

Python api.Cooler.chroms - 3 примера найдено. Это лучшие примеры Python кода для cooler.api.Cooler.chroms, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

bins(4)

chroms(3)

matrix(2)

pixels(1)

Основные методы

bins (4)

chroms (3)

matrix (2)

pixels (1)

Пример #1

Показать файл

def coolerInfo(cool: cooler.api.Cooler, k: str):
    """Retrieve metadata from Cooler file

    The required metadata fields are documented in:
    https://cooler.readthedocs.io/en/latest/schema.html#metadata

    This function will attempt to return the requested field via the input key
    `k` directly from the Cooler `cool` object or if that doesn't work, will try
    to compute it from the contact matrix for certain types of metadata


    Args:
        cool (cooler.api.Cooler): Input Cooler object
        k (str): Key of the metadata field
    Returns: Requested metadata
    """
    if k in cool.info:
        return cool.info[k]
    elif k == 'sum':
        return cool.pixels()['count'][:].sum()
    elif k == 'nbins':
        return cool.bins().shape[0]
    elif k == 'nnz':
        return cool.pixels().shape[0]
    elif k == 'nchroms':
        return cool.chroms().shape[0]
    else:
        raise KeyError(f'Unable to retrieve metadata field \'{k}\'')

Пример #2

Показать файл

def hicrepSCC(cool1: cooler.api.Cooler,
              cool2: cooler.api.Cooler,
              h: int,
              dBPMax: int,
              bDownSample: bool,
              chrNames: list = None,
              excludeChr: set = None):
    """Compute hicrep score between two input Cooler contact matrices

    Args:
        cool1: `cooler.api.Cooler` Input Cooler contact matrix 1
        cool2: `cooler.api.Cooler` Input Cooler contact matrix 2
        h: `int` Half-size of the mean filter used to smooth the
        input matrics
        dBPMax `int` Only include contacts that are at most this genomic
        distance (bp) away
        bDownSample: `bool` Down sample the input with more contacts
        to the same number of contacts as in the other input
        chrNames: `list` List of chromosome names whose SCC to
        compute. Default to None, which means all chromosomes in the
        genome are used to compute SCC
        excludeChr: `set` Set of chromosome names to exclude from SCC
        computation. Default to None.

    Returns:
        `float` scc scores for each chromosome
    """
    binSize1 = cool1.binsize
    binSize2 = cool2.binsize
    assert binSize1 == binSize2,\
        f"Input cool files have different bin sizes"
    assert coolerInfo(cool1, 'nbins') == coolerInfo(cool2, 'nbins'),\
        f"Input cool files have different number of bins"
    assert coolerInfo(cool1, 'nchroms') == coolerInfo(cool2, 'nchroms'),\
        f"Input cool files have different number of chromosomes"
    assert (cool1.chroms()[:] == cool2.chroms()[:]).all()[0],\
        f"Input file have different chromosome names"
    binSize = binSize1
    bins1 = cool1.bins()
    bins2 = cool2.bins()
    if binSize is None:
        # sometimes bin size can be None, e.g., input cool file has
        # non-uniform size bins.
        assert np.all(bins1[:] == bins2[:]),\
            f"Input cooler files don't have a unique bin size most likely "\
            f"because non-uniform bin size was used and the bins are defined "\
            f"differently for the two input cooler files"
        # In that case, use the median bin size
        binSize = int(np.median((bins1[:]["end"] - bins1[:]["start"]).values))
        warnings.warn(f"Input cooler files don't have a unique bin size most "\
                      f"likely because non-uniform bin size was used. HicRep "\
                      f"will use median bin size from the first cooler file "\
                      f"to determine maximal diagonal index to include", RuntimeWarning)
    if dBPMax == -1:
        # this is the exclusive upper bound
        dMax = coolerInfo(cool1, 'nbins')
    else:
        dMax = dBPMax // binSize + 1
    assert dMax > 1, f"Input dBPmax is smaller than binSize"
    p1 = cool2pixels(cool1)
    p2 = cool2pixels(cool2)
    # get the total number of contacts as normalizing constant
    n1 = coolerInfo(cool1, 'sum')
    n2 = coolerInfo(cool2, 'sum')
    # Use dict here so that the chrNames don't duplicate
    if chrNames is None:
        chrNamesDict = dict.fromkeys(cool1.chroms()[:]['name'].tolist())
    else:
        chrNamesDict = dict.fromkeys(chrNames)
    # It's important to preserve the order of the input chrNames so that the
    # user knows the order of the output SCC scores so we bail when encounter
    # duplicate names rather than implicit prunning the names.
    assert chrNames is None or len(chrNamesDict) == len(chrNames), f"""
        Found Duplicates in {chrNames}. Please remove them.
        """
    # filter out excluded chromosomes
    if excludeChr is None:
        excludeChr = set()
    chrNames = [
        chrName for chrName in chrNamesDict if chrName not in excludeChr
    ]
    scc = np.full(len(chrNames), -2.0)
    for iChr, chrName in enumerate(chrNames):
        # normalize by total number of contacts
        mS1 = getSubCoo(p1, bins1, chrName)
        assert mS1.size > 0, "Contact matrix 1 of chromosome %s is empty" % (
            chrName)
        assert mS1.shape[0] == mS1.shape[1],\
            "Contact matrix 1 of chromosome %s is not square" % (chrName)
        mS2 = getSubCoo(p2, bins2, chrName)
        assert mS2.size > 0, "Contact matrix 2 of chromosome %s is empty" % (
            chrName)
        assert mS2.shape[0] == mS2.shape[1],\
            "Contact matrix 2 of chromosome %s is not square" % (chrName)
        assert mS1.shape == mS2.shape,\
            "Contact matrices of chromosome %s have different input shape" % (chrName)
        nDiags = mS1.shape[0] if dMax < 0 else min(dMax, mS1.shape[0])
        rho = np.full(nDiags, np.nan)
        ws = np.full(nDiags, np.nan)
        # remove major diagonal and all the diagonals >= nDiags
        # to save computation time
        m1 = trimDiags(mS1, nDiags, False)
        m2 = trimDiags(mS2, nDiags, False)
        del mS1
        del mS2
        if bDownSample:
            # do downsampling
            size1 = m1.sum()
            size2 = m2.sum()
            if size1 > size2:
                m1 = resample(m1, size2).astype(float)
            elif size2 > size1:
                m2 = resample(m2, size1).astype(float)
        else:
            # just normalize by total contacts
            m1 = m1.astype(float) / n1
            m2 = m2.astype(float) / n2
        if h > 0:
            # apply smoothing
            m1 = meanFilterSparse(m1, h)
            m2 = meanFilterSparse(m2, h)
        scc[iChr] = sccByDiag(m1, m2, nDiags)
    return scc

Пример #3

Показать файл

Файл: hicrep.py Проект: justin-a-sanders/hicrep

def hicrepSCC(cool1: cooler.api.Cooler, cool2: cooler.api.Cooler, h: int,
              dBPMax: int, bDownSample: bool):
    """Compute hicrep score between two input Cooler contact matrices

    Args:
        cool1: `cooler.api.Cooler` Input Cooler contact matrix 1
        cool2: `cooler.api.Cooler` Input Cooler contact matrix 2
        h: `int` Half-size of the mean filter used to smooth the
        input matrics
        dBPMax `int` Only include contacts that are at most this genomic
        distance (bp) away
        bDownSample: `bool` Down sample the input with more contacts
        to the same number of contacts as in the other input

    Returns:
        `float` scc scores for each chromosome
    """
    binSize1 = cool1.binsize
    binSize2 = cool2.binsize
    assert binSize1 == binSize2,\
        f"Input cool files have different bin sizes"
    assert coolerInfo(cool1, 'nbins') == coolerInfo(cool2, 'nbins'),\
        f"Input cool files have different number of bins"
    assert coolerInfo(cool1, 'nchroms') == coolerInfo(cool2, 'nchroms'),\
        f"Input cool files have different number of chromosomes"
    assert (cool1.chroms()[:] == cool2.chroms()[:]).all()[0],\
        f"Input file have different chromosome names"
    binSize = binSize1
    bins1 = cool1.bins()
    bins2 = cool2.bins()
    if binSize is None:
        # sometimes bin size can be None, e.g., input cool file has
        # non-uniform size bins.
        assert np.all(bins1[:] == bins2[:]),\
            f"Input cooler files don't have a unique bin size most likely "\
            f"because non-uniform bin size was used and the bins are defined "\
            f"differently for the two input cooler files"
        # In that case, use the median bin size
        binSize = int(np.median((bins1[:]["end"] - bins1[:]["start"]).values))
        warnings.warn(f"Input cooler files don't have a unique bin size most "\
                      f"likely because non-uniform bin size was used. HicRep "\
                      f"will use median bin size from the first cooler file "\
                      f"to determine maximal diagonal index to include", RuntimeWarning)
    if dBPMax == -1:
        # this is the exclusive upper bound
        dMax = coolerInfo(cool1, 'nbins')
    else:
        dMax = dBPMax // binSize + 1
    assert dMax > 1, f"Input dBPmax is smaller than binSize"
    p1 = cool2pixels(cool1)
    p2 = cool2pixels(cool2)
    # get the total number of contacts as normalizing constant
    n1 = coolerInfo(cool1, 'sum')
    n2 = coolerInfo(cool2, 'sum')
    chrNames = cool1.chroms()[:]['name'].to_numpy()
    # filter out mitochondria chromosome
    chrNames = np.array([name for name in chrNames if name != 'M'])
    scc = np.full(chrNames.shape[0], -2.0)
    for iChr in range(chrNames.shape[0]):
        chrName = chrNames[iChr]
        # normalize by total number of contacts
        mS1 = getSubCoo(p1, bins1, chrName)
        assert mS1.size > 0, "Contact matrix 1 of chromosome %s is empty" % (
            chrName)
        assert mS1.shape[0] == mS1.shape[1],\
            "Contact matrix 1 of chromosome %s is not square" % (chrName)
        mS2 = getSubCoo(p2, bins2, chrName)
        assert mS2.size > 0, "Contact matrix 2 of chromosome %s is empty" % (
            chrName)
        assert mS2.shape[0] == mS2.shape[1],\
            "Contact matrix 2 of chromosome %s is not square" % (chrName)
        assert mS1.shape == mS2.shape,\
            "Contact matrices of chromosome %s have different input shape" % (chrName)
        nDiags = mS1.shape[0] if dMax < 0 else min(dMax, mS1.shape[0])
        rho = np.full(nDiags, np.nan)
        ws = np.full(nDiags, np.nan)
        # remove major diagonal and all the diagonals >= nDiags
        # to save computation time
        m1 = trimDiags(mS1, nDiags, False)
        m2 = trimDiags(mS2, nDiags, False)
        del mS1
        del mS2
        if bDownSample:
            # do downsampling
            size1 = m1.sum()
            size2 = m2.sum()
            if size1 > size2:
                m1 = resample(m1, size2).astype(float)
            elif size2 > size1:
                m2 = resample(m2, size1).astype(float)
        else:
            # just normalize by total contacts
            m1 = m1.astype(float) / n1
            m2 = m2.astype(float) / n2
        if h > 0:
            # apply smoothing
            m1 = meanFilterSparse(m1, h)
            m2 = meanFilterSparse(m2, h)
        scc[iChr] = sccByDiag(m1, m2, nDiags)
    return scc