Exemplo n.º 1
0
 def setUp(self):
     self.bam = test_utils.genomics_core_testdata('test.bam')
     self.options = reads_pb2.SamReaderOptions()
     self.indexed_options = reads_pb2.SamReaderOptions(
         index_mode=index_pb2.INDEX_BASED_ON_FILENAME)
Exemplo n.º 2
0
 def setUp(self):
     self.bam = test_utils.genomics_core_testdata('test.bam')
     self.options = reads_pb2.SamReaderOptions()
Exemplo n.º 3
0
    def __init__(self,
                 input_path,
                 read_requirements=None,
                 parse_aux_fields=False,
                 hts_block_size=None,
                 downsample_fraction=None,
                 random_seed=None):
        """Initializes a NativeSamReader.

    Args:
      input_path: str. A path to a resource containing SAM/BAM records.
        Currently supports SAM text format and BAM binary format.
      read_requirements: optional ReadRequirement proto. If not None, this proto
        is used to control which reads are filtered out by the reader before
        they are passed to the client.
      parse_aux_fields: optional bool, defaulting to False. If False, we do not
        parse the auxiliary fields of the SAM/BAM records (see SAM spec for
        details). Parsing the aux fields is unnecessary for many applications,
        and adds a significant parsing cost to access. If you need these aux
        fields, set parse_aux_fields to True and these fields will be parsed and
        populate the appropriate Read proto fields (e.g., read.info).
      hts_block_size: int or None. If specified, this configures the block size
        of the underlying htslib file object. Larger values (e.g. 1M) may be
        beneficial for reading remote files. If None, the reader uses the
        default htslib block size.
      downsample_fraction: float in the interval [0.0, 1.0] or None. If
        specified as a positive float, the reader will only keep each read with
        probability downsample_fraction, randomly. If None or zero, all reads
        are kept.
      random_seed: None or int. The random seed to use with this sam reader, if
        needed. If None, a fixed random value will be assigned.

    Raises:
      ValueError: If downsample_fraction is not None and not in the interval
        (0.0, 1.0].
      ImportError: If someone tries to load a tfbam file.
    """
        if input_path.endswith('.tfbam'):
            # Delayed loading of tfbam_lib.
            try:
                from tfbam_lib import tfbam_reader  # pylint: disable=g-import-not-at-top
                self._reader = tfbam_reader.make_sam_reader(
                    input_path,
                    read_requirements=read_requirements,
                    unused_block_size=hts_block_size,
                    downsample_fraction=downsample_fraction,
                    random_seed=random_seed)
            except ImportError:
                raise ImportError(
                    'tfbam_lib module not found, cannot read .tfbam files.')
        else:
            aux_field_handling = reads_pb2.SamReaderOptions.SKIP_AUX_FIELDS
            if parse_aux_fields:
                aux_field_handling = reads_pb2.SamReaderOptions.PARSE_ALL_AUX_FIELDS

            # We make 0 be a valid value that means "keep all reads" so that proto
            # defaults (=0) do not omit all reads.
            if downsample_fraction is not None and downsample_fraction != 0:
                if not 0.0 < downsample_fraction <= 1.0:
                    raise ValueError(
                        'downsample_fraction must be in the interval (0.0, 1.0]',
                        downsample_fraction)

            if random_seed is None:
                # Fixed random seed produced with 'od -vAn -N4 -tu4 < /dev/urandom'.
                random_seed = 2928130004

            self._reader = sam_reader.SamReader.from_file(
                input_path.encode('utf8'),
                reads_pb2.SamReaderOptions(
                    read_requirements=read_requirements,
                    aux_field_handling=aux_field_handling,
                    hts_block_size=(hts_block_size or 0),
                    downsample_fraction=downsample_fraction,
                    random_seed=random_seed))

            self.header = self._reader.header

        super(NativeSamReader, self).__init__()