def run(self): self.logger.info("Starting analysis") fasta = Fasta(self.logger, self.plasmid_data, self.kmer, self.homopolyer_compression, max_kmer_count=self.max_kmer_count) fastq = Fastq(self.logger, self.input_fastq, self.kmer, fasta.all_kmers_in_file(), self.min_fasta_hits, self.print_interval, self.output_file, self.filtered_reads_file, fasta, self.homopolyer_compression, max_gap=self.max_gap, min_block_size=self.min_block_size, margin=self.margin, start_time=self.start_time, min_kmers_for_onex_pass=self.min_kmers_for_onex_pass, min_perc_coverage=self.min_perc_coverage, max_kmer_count=self.max_kmer_count, no_gene_filter=self.no_gene_filter) fastq.read_filter_and_map()
def test_gzipped_input(self): logger = logging.getLogger(__name__) logger.setLevel(logging.ERROR) fasta = Fasta(logger, os.path.join(data_dir,'plasmid_data.fa'),4, True) fastq = Fastq(logger, os.path.join(data_dir,'query_gz.fastq.gz'), 4 , fasta.all_kmers_in_file(), 1, 50, None, None, fasta, True) self.assertTrue(fastq.read_filter_and_map())
def test_with_matching_read(self): logger = logging.getLogger(__name__) logger.setLevel(logging.ERROR) fasta = Fasta(logger, os.path.join(data_dir, 'plasmid_data.fa'), 11, True) fastq = Fastq(logger, os.path.join(data_dir, 'query.fastq'), 11, fasta.all_kmers_in_file(), 1, 50, None, None, fasta, True) self.assertTrue( fastq.does_read_contain_quick_pass_kmers( "ATCAATACCTTCTTTATTGATTTTGATATTCACACGGCAAAAGAAACTATTTCAGCAAGCGATA" "ATTTTAACAACCGCTATTGATTTAGGTTTTATGCCTACTATGATTATCAAATCTGATAAAGGTT" "ATCAAGCATATTTTGTTTTAGAAACGCCAGTCTATGTGACTTCAAAATCAGAATTTAAATCTGT" "CAAAGCAGCCAAAATAATTTCGCAAAATATCCGAGAATATTTTGGAAAGTCTTTGCCAGTTGAT" "CTAACGTGTAATCATTTTGGTATTGCTCGCATACCAAGAACGGACAATGTAGAATTTTTTGATC" "CTAATTACCGTTATTCTTTCAAAGAATGGCAAGATTGGTCTTTCAAACAAACAGATAATAAGGG" "CTTTACTCGTTCAAGTCTAACGGTTTTAAGCGGTACAGAAGGCAAAAAACAAGTAGATGAACCC" "TGGTTTAATCTCTTATTGCACGAAACGAAATTTTCAGGAGAAAAGGGTTTAATAGGGCGTAATA" "ACGTCATGTTTACCCTCTCTTTAGCCTACTTTAGTTCAGGCTATTCAATCGAAACGTGCGAATA" "TAATATGTTTGAGTTTAATAATCGATTAGATCAACCCTTAGAAGAAAAAGAAGTAATCAAAATT" "GTTAGAAGTGCCTATTCAGAAAACTATCAAGGGGCTAATAGGGAATACATTACCATTCTTTGCA" "AAGCTTGGGTATCAAGTGATTTAACCAGTAAAGATTTATTTGTCCGTCAAGGGTGGTTTAAATT" "CAAGAAAAAAAGAAGCGAACGTCAACGTGTTCATTTGTCAGAATGGAAAGAAGATTTAATGGCT" "TATATTAGCGAAAAAAGCGATGTATACAAGCCTTATTTAGTGACGACCAAAAAAGAGATTAGAG" "AAGTG"))
def test_with_nonmatching_read(self): logger = logging.getLogger(__name__) logger.setLevel(logging.ERROR) fasta = Fasta(logger, os.path.join(data_dir,'plasmid_data.fa'),4, True) fastq = Fastq(logger, os.path.join(data_dir,'query.fastq'), 4 , fasta.all_kmers_in_file(), 1, 50, None, None, fasta, True) self.assertFalse(fastq.does_read_contain_quick_pass_kmers("AAAAAAAAAAAAAAAA"))
def test_filtering_alleles_partial_equal_values(self): logger = logging.getLogger(__name__) logger.setLevel(logging.ERROR) fastq = Fastq(logger, os.path.join(data_dir,'query.fastq'), 11 , None, 1, 50, None, None, None, True) input_alleles = [ Gene('rep7.1_repC(Cassette)_AB037671', 9, 1), Gene('rep7.5_CDS1(pKC5b)_AF378372', 9, 1), Gene('rep7.6_ORF(pKH1)_SAU38656', 9, 1), Gene('repUS14.1_repA(VRSAp)_AP003367', 10, 0)] expected_allele_names = ['rep7.1','repUS14.1'] filtered_alleles = fastq.filter_contained_alleles(input_alleles) self.assertEqual(expected_allele_names, sorted(list(map(lambda x: x.short_name(), filtered_alleles))))
def test_writting_to_output_file(self): logger = logging.getLogger(__name__) logger.setLevel(logging.ERROR) fasta = Fasta(logger, os.path.join(data_dir,'plasmid_data.fa'),4, True) fastq = Fastq(logger, os.path.join(data_dir,'query_gz.fastq.gz'), 4 , fasta.all_kmers_in_file(), 1, 50, 'outputfile', None, fasta, True) fastq.read_filter_and_map() self.assertTrue(os.path.exists('outputfile')) self.assertTrue(filecmp.cmp(os.path.join(data_dir, 'expected_outputfile'), 'outputfile')) os.remove('outputfile')