Python RefFastaReader示例

编程语言: Python

命名空间/包名称: nucleus.io.fasta

方法/功能: RefFastaReader

hotexamples.com的示例: 6

Python RefFastaReader - 已找到6个示例。这些是从开源项目中提取的最受好评的nucleus.io.fasta.RefFastaReader现实Python示例。您可以评价示例，以帮助我们提高示例质量。

示例#1

显示文件

文件： fasta_test.py 项目： zpeng1989/nucleus

  def setUpClass(cls):
    cls.fasta_reader = fasta.RefFastaReader(
        test_utils.genomics_core_testdata('test.fasta'))

    cls.in_mem = fasta.InMemoryRefReader(
        [(contig.name, 0,
          cls.fasta_reader.query(
              ranges.make_range(contig.name, 0, contig.n_bases)))
         for contig in cls.fasta_reader.header.contigs])

示例#2

显示文件

def main(argv):
  if len(argv) != 3:
    print('Usage: {} <input_ref> <input_vcf>'.format(argv[0]))
    sys.exit(-1)
  in_ref = argv[1]
  in_vcf = argv[2]

  with fasta.RefFastaReader(in_ref) as ref_reader:
    with vcf.VcfReader(in_vcf, use_index=False) as vcf_reader:
      validate_contigs(ref_reader.header.contigs, vcf_reader.header.contigs)
      for variant in vcf_reader:
        validate_variant(ref_reader, variant)

  # VCF is valid!
  print('Reference and VCF are compatible.')
  sys.exit(0)

示例#3

显示文件

 def test_make_ref_reader_cache_specified(self, fasta_filename):
     fasta_path = test_utils.genomics_core_testdata(fasta_filename)
     with fasta.RefFastaReader(fasta_path, cache_size=10) as reader:
         self.assertEqual(reader.query(ranges.make_range('chrM', 1, 5)),
                          'ATCA')

示例#4

显示文件

 def test_make_ref_reader_default(self, fasta_filename):
     fasta_path = test_utils.genomics_core_testdata(fasta_filename)
     with fasta.RefFastaReader(fasta_path) as reader:
         self.assertEqual(reader.query(ranges.make_range('chrM', 1, 6)),
                          'ATCAC')

示例#5

显示文件

def make_ngs_error_examples(ref_path,
                            vcf_path,
                            bam_path,
                            examples_out_path,
                            max_reads=None):
    """Driver program for ngs_errors.

  See module description for details.

  Args:
    ref_path: str. A path to an indexed fasta file.
    vcf_path: str. A path to an indexed VCF file.
    bam_path: str. A path to an SAM/BAM file.
    examples_out_path: str. A path where we will write out examples.
    max_reads: int or None. If not None, we will emit at most max_reads examples
      to examples_out_path.
  """

    # Create a ref_reader backed by ref.
    ref_reader = fasta.RefFastaReader(ref_path)

    # Create a vcf_reader backed by vcf.
    vcf_reader = vcf.VcfReader(vcf_path)

    # Create a sam_reader backed by bam. Provide an empty ReadRequirements
    # proto to the reader so it enables standard filtering based on the default
    # values of ReadRequirements. Also explicitly allow the reader to access an
    # unindexed BAM, so only the iterate() function is enabled.
    read_requirements = reads_pb2.ReadRequirements()
    sam_reader = sam.SamReader(bam_path,
                               use_index=False,
                               read_requirements=read_requirements)

    # Create our TFRecordWriter where we'll send our tf.Examples.
    examples_out = genomics_writer.TFRecordWriter(examples_out_path)

    # All our readers and writers are context managers, so use the `with`
    # construct to open all of the inputs/outputs and close them when we are done
    # looping over our reads.
    n_examples = 0
    with ref_reader, vcf_reader, sam_reader, examples_out:
        # Loop over the reads in our BAM file:
        for i, read in enumerate(sam_reader.iterate(), start=1):
            # Get the Range proto describing the chrom/start/stop spanned by our read.
            read_range = utils.read_range(read)

            # Get all of the variants that overlap our read range.
            variants = list(vcf_reader.query(read_range))

            # Get the reference bases spanned by our read.
            ref_bases = ref_reader.query(read_range)

            # Check that we can use our read for generating an example.
            if is_usable_training_example(read, variants, ref_bases):
                n_examples += 1

                # Convert read and ref_bases to a tf.Example with make_example.
                example = make_example(read, ref_bases)

                # And write it out to our TFRecord output file.
                examples_out.write(example)

                # Do a bit of convenient logging. This is very verbose if we convert a
                # lot of reads...
                logging.info((
                    'Added an example for read %s (span=%s) with cigar %s [%d added '
                    'of %d total reads]'), read.fragment_name,
                             ranges.to_literal(read_range),
                             cigar.format_cigar_units(read.alignment.cigar),
                             n_examples, i)

                if max_reads is not None and n_examples >= max_reads:
                    return

示例#6

显示文件

文件： fasta_test.py 项目： zpeng1989/nucleus

 def test_c_reader(self):
   with fasta.RefFastaReader(
       test_utils.genomics_core_testdata('test.fasta')) as reader:
     self.assertIsInstance(reader.c_reader, reference_fai.GenomeReferenceFai)