Пример #1
0
def read_bam(f, output_df=False, mapq=0, required_flag=0, filter_flag=1540):

    df = bamread.read_bam(f, mapq, required_flag, filter_flag)

    if output_df:
        return df
    else:
        return PyRanges(df)

    return bamread.read_bam(f, mapq, required_flag, filter_flag)
Пример #2
0
def read_bam(f,
             sparse=True,
             output_df=False,
             mapq=0,
             required_flag=0,
             filter_flag=1540):

    try:
        import bamread
    except ModuleNotFoundError as e:
        print(
            "bamread must be installed to read bam. Use `conda install -c bioconda bamread` or `pip install bamread` to install it."
        )
        sys.exit(1)

    if sparse:
        df = bamread.read_bam(f, mapq, required_flag, filter_flag)
    else:
        try:
            df = bamread.read_bam_full(f, mapq, required_flag, filter_flag)
        except AttributeError:
            print(
                "bamread version 0.0.6 or higher is required to read bam non-sparsely."
            )

    if output_df:
        return df
    else:
        return PyRanges(df)
Пример #3
0
def read_bam(f, output_df=False, mapq=0, required_flag=0, filter_flag=1540):

    try:
        import bamread
    except ModuleNotFoundError:
        print(
            "bamread must be installed to read bam. Use `conda install -c bioconda bamread` or `pip install bamread` to install it."
        )
        sys.exit(1)

    df = bamread.read_bam(f, mapq, required_flag, filter_flag)

    if output_df:
        return df
    else:
        return PyRanges(df)

    return bamread.read_bam(f, mapq, required_flag, filter_flag)
Пример #4
0
def read_bam(f,
             sparse=True,
             as_df=False,
             mapq=0,
             required_flag=0,
             filter_flag=1540):
    """Return bam file as PyRanges.

    Parameters
    ----------
    f : str

        Path to bam file

    sparse : bool, default True

        Whether to return only.

    as_df : bool, default False

        Whether to return as pandas DataFrame instead of PyRanges.

    mapq : int, default 0

        Minimum mapping quality score.

    required_flag : int, default 0

        Flags which must be present for the interval to be read.

    filter_flag : int, default 1540

        Ignore reads with these flags. Default 1540, which means that either
        the read is unmapped, the read failed vendor or platfrom quality
        checks, or the read is a PCR or optical duplicate.

    Notes
    -----

    This functionality requires the library `bamread`. It can be installed with
    `pip install bamread` or `conda install -c bioconda bamread`.

    Examples
    --------

    >>> path = pr.get_example_path("control.bam")
    >>> pr.read_bam(path)
    +--------------+-----------+-----------+--------------+------------+
    | Chromosome   | Start     | End       | Strand       | Flag       |
    | (category)   | (int32)   | (int32)   | (category)   | (uint16)   |
    |--------------+-----------+-----------+--------------+------------|
    | chr1         | 887771    | 887796    | +            | 16         |
    | chr1         | 994660    | 994685    | +            | 16         |
    | chr1         | 1770383   | 1770408   | +            | 16         |
    | chr1         | 1995141   | 1995166   | +            | 16         |
    | ...          | ...       | ...       | ...          | ...        |
    | chrY         | 57402214  | 57402239  | +            | 16         |
    | chrY         | 10643526  | 10643551  | -            | 0          |
    | chrY         | 11776321  | 11776346  | -            | 0          |
    | chrY         | 20557165  | 20557190  | -            | 0          |
    +--------------+-----------+-----------+--------------+------------+
    Stranded PyRanges object has 10,000 rows and 5 columns from 25 chromosomes.
    For printing, the PyRanges was sorted on Chromosome and Strand.

    >>> pr.read_bam(path, sparse=False, as_df=True)
         Chromosome    Start      End Strand  Flag  QueryStart  QueryEnd Name Cigar Quality
    0          chr1   887771   887796      +    16           0        25   U0   25M    None
    1          chr1   994660   994685      +    16           0        25   U0   25M    None
    2          chr1  1041102  1041127      -     0           0        25   U0   25M    None
    3          chr1  1770383  1770408      +    16           0        25   U0   25M    None
    4          chr1  1995141  1995166      +    16           0        25   U0   25M    None
    ...         ...      ...      ...    ...   ...         ...       ...  ...   ...     ...
    9995       chrM     3654     3679      -     0           0        25   U0   25M    None
    9996       chrM     3900     3925      +    16           0        25   U0   25M    None
    9997       chrM    13006    13031      +    16           0        25   U0   25M    None
    9998       chrM    14257    14282      -     0           0        25   U0   25M    None
    9999       chrM    14257    14282      -     0           0        25   U0   25M    None
    <BLANKLINE>
    [10000 rows x 10 columns]
    """

    try:
        import bamread
    except ModuleNotFoundError as e:
        print(
            "bamread must be installed to read bam. Use `conda install -c bioconda bamread` or `pip install bamread` to install it."
        )
        sys.exit(1)

    if sparse:
        df = bamread.read_bam(f, mapq, required_flag, filter_flag)
    else:
        try:
            df = bamread.read_bam_full(f, mapq, required_flag, filter_flag)
        except AttributeError:
            print(
                "bamread version 0.0.6 or higher is required to read bam non-sparsely."
            )

    if as_df:
        return df
    else:
        return PyRanges(df)