def read_bam(f, output_df=False, mapq=0, required_flag=0, filter_flag=1540): df = bamread.read_bam(f, mapq, required_flag, filter_flag) if output_df: return df else: return PyRanges(df) return bamread.read_bam(f, mapq, required_flag, filter_flag)
def read_bam(f, sparse=True, output_df=False, mapq=0, required_flag=0, filter_flag=1540): try: import bamread except ModuleNotFoundError as e: print( "bamread must be installed to read bam. Use `conda install -c bioconda bamread` or `pip install bamread` to install it." ) sys.exit(1) if sparse: df = bamread.read_bam(f, mapq, required_flag, filter_flag) else: try: df = bamread.read_bam_full(f, mapq, required_flag, filter_flag) except AttributeError: print( "bamread version 0.0.6 or higher is required to read bam non-sparsely." ) if output_df: return df else: return PyRanges(df)
def read_bam(f, output_df=False, mapq=0, required_flag=0, filter_flag=1540): try: import bamread except ModuleNotFoundError: print( "bamread must be installed to read bam. Use `conda install -c bioconda bamread` or `pip install bamread` to install it." ) sys.exit(1) df = bamread.read_bam(f, mapq, required_flag, filter_flag) if output_df: return df else: return PyRanges(df) return bamread.read_bam(f, mapq, required_flag, filter_flag)
def read_bam(f, sparse=True, as_df=False, mapq=0, required_flag=0, filter_flag=1540): """Return bam file as PyRanges. Parameters ---------- f : str Path to bam file sparse : bool, default True Whether to return only. as_df : bool, default False Whether to return as pandas DataFrame instead of PyRanges. mapq : int, default 0 Minimum mapping quality score. required_flag : int, default 0 Flags which must be present for the interval to be read. filter_flag : int, default 1540 Ignore reads with these flags. Default 1540, which means that either the read is unmapped, the read failed vendor or platfrom quality checks, or the read is a PCR or optical duplicate. Notes ----- This functionality requires the library `bamread`. It can be installed with `pip install bamread` or `conda install -c bioconda bamread`. Examples -------- >>> path = pr.get_example_path("control.bam") >>> pr.read_bam(path) +--------------+-----------+-----------+--------------+------------+ | Chromosome | Start | End | Strand | Flag | | (category) | (int32) | (int32) | (category) | (uint16) | |--------------+-----------+-----------+--------------+------------| | chr1 | 887771 | 887796 | + | 16 | | chr1 | 994660 | 994685 | + | 16 | | chr1 | 1770383 | 1770408 | + | 16 | | chr1 | 1995141 | 1995166 | + | 16 | | ... | ... | ... | ... | ... | | chrY | 57402214 | 57402239 | + | 16 | | chrY | 10643526 | 10643551 | - | 0 | | chrY | 11776321 | 11776346 | - | 0 | | chrY | 20557165 | 20557190 | - | 0 | +--------------+-----------+-----------+--------------+------------+ Stranded PyRanges object has 10,000 rows and 5 columns from 25 chromosomes. For printing, the PyRanges was sorted on Chromosome and Strand. >>> pr.read_bam(path, sparse=False, as_df=True) Chromosome Start End Strand Flag QueryStart QueryEnd Name Cigar Quality 0 chr1 887771 887796 + 16 0 25 U0 25M None 1 chr1 994660 994685 + 16 0 25 U0 25M None 2 chr1 1041102 1041127 - 0 0 25 U0 25M None 3 chr1 1770383 1770408 + 16 0 25 U0 25M None 4 chr1 1995141 1995166 + 16 0 25 U0 25M None ... ... ... ... ... ... ... ... ... ... ... 9995 chrM 3654 3679 - 0 0 25 U0 25M None 9996 chrM 3900 3925 + 16 0 25 U0 25M None 9997 chrM 13006 13031 + 16 0 25 U0 25M None 9998 chrM 14257 14282 - 0 0 25 U0 25M None 9999 chrM 14257 14282 - 0 0 25 U0 25M None <BLANKLINE> [10000 rows x 10 columns] """ try: import bamread except ModuleNotFoundError as e: print( "bamread must be installed to read bam. Use `conda install -c bioconda bamread` or `pip install bamread` to install it." ) sys.exit(1) if sparse: df = bamread.read_bam(f, mapq, required_flag, filter_flag) else: try: df = bamread.read_bam_full(f, mapq, required_flag, filter_flag) except AttributeError: print( "bamread version 0.0.6 or higher is required to read bam non-sparsely." ) if as_df: return df else: return PyRanges(df)