Exemplo n.º 1
0
 def run(self):
     self.logger.info("Starting analysis")
     fasta = Fasta(self.logger,
                   self.plasmid_data,
                   self.kmer,
                   self.homopolyer_compression,
                   max_kmer_count=self.max_kmer_count)
     fastq = Fastq(self.logger,
                   self.input_fastq,
                   self.kmer,
                   fasta.all_kmers_in_file(),
                   self.min_fasta_hits,
                   self.print_interval,
                   self.output_file,
                   self.filtered_reads_file,
                   fasta,
                   self.homopolyer_compression,
                   max_gap=self.max_gap,
                   min_block_size=self.min_block_size,
                   margin=self.margin,
                   start_time=self.start_time,
                   min_kmers_for_onex_pass=self.min_kmers_for_onex_pass,
                   min_perc_coverage=self.min_perc_coverage,
                   max_kmer_count=self.max_kmer_count,
                   no_gene_filter=self.no_gene_filter)
     fastq.read_filter_and_map()
Exemplo n.º 2
0
	def test_gzipped_input(self):
		logger = logging.getLogger(__name__)
		logger.setLevel(logging.ERROR)
		fasta = Fasta(logger, os.path.join(data_dir,'plasmid_data.fa'),4, True)
		
		fastq = Fastq(logger, os.path.join(data_dir,'query_gz.fastq.gz'), 4 , fasta.all_kmers_in_file(), 1, 50, None, None, fasta, True)
		self.assertTrue(fastq.read_filter_and_map())
Exemplo n.º 3
0
    def test_with_matching_read(self):
        logger = logging.getLogger(__name__)
        logger.setLevel(logging.ERROR)
        fasta = Fasta(logger, os.path.join(data_dir, 'plasmid_data.fa'), 11,
                      True)

        fastq = Fastq(logger, os.path.join(data_dir, 'query.fastq'), 11,
                      fasta.all_kmers_in_file(), 1, 50, None, None, fasta,
                      True)

        self.assertTrue(
            fastq.does_read_contain_quick_pass_kmers(
                "ATCAATACCTTCTTTATTGATTTTGATATTCACACGGCAAAAGAAACTATTTCAGCAAGCGATA"
                "ATTTTAACAACCGCTATTGATTTAGGTTTTATGCCTACTATGATTATCAAATCTGATAAAGGTT"
                "ATCAAGCATATTTTGTTTTAGAAACGCCAGTCTATGTGACTTCAAAATCAGAATTTAAATCTGT"
                "CAAAGCAGCCAAAATAATTTCGCAAAATATCCGAGAATATTTTGGAAAGTCTTTGCCAGTTGAT"
                "CTAACGTGTAATCATTTTGGTATTGCTCGCATACCAAGAACGGACAATGTAGAATTTTTTGATC"
                "CTAATTACCGTTATTCTTTCAAAGAATGGCAAGATTGGTCTTTCAAACAAACAGATAATAAGGG"
                "CTTTACTCGTTCAAGTCTAACGGTTTTAAGCGGTACAGAAGGCAAAAAACAAGTAGATGAACCC"
                "TGGTTTAATCTCTTATTGCACGAAACGAAATTTTCAGGAGAAAAGGGTTTAATAGGGCGTAATA"
                "ACGTCATGTTTACCCTCTCTTTAGCCTACTTTAGTTCAGGCTATTCAATCGAAACGTGCGAATA"
                "TAATATGTTTGAGTTTAATAATCGATTAGATCAACCCTTAGAAGAAAAAGAAGTAATCAAAATT"
                "GTTAGAAGTGCCTATTCAGAAAACTATCAAGGGGCTAATAGGGAATACATTACCATTCTTTGCA"
                "AAGCTTGGGTATCAAGTGATTTAACCAGTAAAGATTTATTTGTCCGTCAAGGGTGGTTTAAATT"
                "CAAGAAAAAAAGAAGCGAACGTCAACGTGTTCATTTGTCAGAATGGAAAGAAGATTTAATGGCT"
                "TATATTAGCGAAAAAAGCGATGTATACAAGCCTTATTTAGTGACGACCAAAAAAGAGATTAGAG"
                "AAGTG"))
Exemplo n.º 4
0
	def test_with_nonmatching_read(self):
		logger = logging.getLogger(__name__)
		logger.setLevel(logging.ERROR)
		fasta = Fasta(logger, os.path.join(data_dir,'plasmid_data.fa'),4, True)
		
		fastq = Fastq(logger, os.path.join(data_dir,'query.fastq'), 4 , fasta.all_kmers_in_file(), 1, 50, None, None, fasta, True)
		
		self.assertFalse(fastq.does_read_contain_quick_pass_kmers("AAAAAAAAAAAAAAAA"))
Exemplo n.º 5
0
	def test_filtering_alleles_partial_equal_values(self):
		logger = logging.getLogger(__name__)
		logger.setLevel(logging.ERROR)
		fastq = Fastq(logger, os.path.join(data_dir,'query.fastq'), 11 , None, 1, 50, None, None, None, True)

		input_alleles = [ Gene('rep7.1_repC(Cassette)_AB037671', 9, 1), Gene('rep7.5_CDS1(pKC5b)_AF378372', 9, 1), Gene('rep7.6_ORF(pKH1)_SAU38656', 9, 1), Gene('repUS14.1_repA(VRSAp)_AP003367', 10, 0)]
		expected_allele_names = ['rep7.1','repUS14.1']
		filtered_alleles = fastq.filter_contained_alleles(input_alleles)
		self.assertEqual(expected_allele_names, sorted(list(map(lambda x: x.short_name(), filtered_alleles))))
Exemplo n.º 6
0
	def test_writting_to_output_file(self):
		logger = logging.getLogger(__name__)
		logger.setLevel(logging.ERROR)
		fasta = Fasta(logger, os.path.join(data_dir,'plasmid_data.fa'),4, True)
		
		fastq = Fastq(logger, os.path.join(data_dir,'query_gz.fastq.gz'), 4 , fasta.all_kmers_in_file(), 1, 50, 'outputfile', None, fasta, True)
		fastq.read_filter_and_map()
		self.assertTrue(os.path.exists('outputfile'))
		self.assertTrue(filecmp.cmp(os.path.join(data_dir, 'expected_outputfile'), 'outputfile'))
		os.remove('outputfile')