def run(self): self.logger.info("Starting analysis") fasta = Fasta(self.logger, self.plasmid_data, self.kmer, self.homopolyer_compression, max_kmer_count=self.max_kmer_count) fastq = Fastq(self.logger, self.input_fastq, self.kmer, fasta.all_kmers_in_file(), self.min_fasta_hits, self.print_interval, self.output_file, self.filtered_reads_file, fasta, self.homopolyer_compression, max_gap=self.max_gap, min_block_size=self.min_block_size, margin=self.margin, start_time=self.start_time, min_kmers_for_onex_pass=self.min_kmers_for_onex_pass, min_perc_coverage=self.min_perc_coverage, max_kmer_count=self.max_kmer_count, no_gene_filter=self.no_gene_filter) fastq.read_filter_and_map()
def test_with_matching_read(self): logger = logging.getLogger(__name__) logger.setLevel(logging.ERROR) fasta = Fasta(logger, os.path.join(data_dir,'plasmid_data.fa'),11, True) fastq = Fastq(logger, os.path.join(data_dir,'query.fastq'), 11 , fasta.all_kmers_in_file(), 1, 50, None, None, fasta, True) self.assertTrue(fastq.does_read_contain_quick_pass_kmers("ATCAATACCTTCTTTATTGATTTTGATATTCACACGGCAAAAGAAACTATTTCAGCAAGCGATATTTTAACAACCGCTATTGATTTAGGTTTTATGCCTACTATGATTATCAAATCTGATAAAGGTTATCAAGCATATTTTGTTTTAGAAACGCCAGTCTATGTGACTTCAAAATCAGAATTTAAATCTGTCAAAGCAGCCAAAATAATTTCGCAAAATATCCGAGAATATTTTGGAAAGTCTTTGCCAGTTGATCTAACGTGTAATCATTTTGGTATTGCTCGCATACCAAGAACGGACAATGTAGAATTTTTTGATCCTAATTACCGTTATTCTTTCAAAGAATGGCAAGATTGGTCTTTCAAACAAACAGATAATAAGGGCTTTACTCGTTCAAGTCTAACGGTTTTAAGCGGTACAGAAGGCAAAAAACAAGTAGATGAACCCTGGTTTAATCTCTTATTGCACGAAACGAAATTTTCAGGAGAAAAGGGTTTAATAGGGCGTAATAACGTCATGTTTACCCTCTCTTTAGCCTACTTTAGTTCAGGCTATTCAATCGAAACGTGCGAATATAATATGTTTGAGTTTAATAATCGATTAGATCAACCCTTAGAAGAAAAAGAAGTAATCAAAATTGTTAGAAGTGCCTATTCAGAAAACTATCAAGGGGCTAATAGGGAATACATTACCATTCTTTGCAAAGCTTGGGTATCAAGTGATTTAACCAGTAAAGATTTATTTGTCCGTCAAGGGTGGTTTAAATTCAAGAAAAAAAGAAGCGAACGTCAACGTGTTCATTTGTCAGAATGGAAAGAAGATTTAATGGCTTATATTAGCGAAAAAAGCGATGTATACAAGCCTTATTTAGTGACGACCAAAAAAGAGATTAGAGAAGTG"))
def test_with_nonmatching_read(self): logger = logging.getLogger(__name__) logger.setLevel(logging.ERROR) fasta = Fasta(logger, os.path.join(data_dir,'plasmid_data.fa'),4, True) fastq = Fastq(logger, os.path.join(data_dir,'query.fastq'), 4 , fasta.all_kmers_in_file(), 1, 50, None, None, fasta, True) self.assertFalse(fastq.does_read_contain_quick_pass_kmers("AAAAAAAAAAAAAAAA"))
def test_four_kmers(self): logger = logging.getLogger(__name__) logger.setLevel(logging.ERROR) fasta = Fasta(logger, os.path.join(data_dir,'plasmid_data.fa'),4, True) fastq = Fastq(logger, os.path.join(data_dir,'query.fastq'), 4 , fasta.all_kmers_in_file(), 1, 50, None, None, fasta, True) self.assertTrue(fastq.read_filter_and_map())
def test_filtering_alleles_all_complete(self): logger = logging.getLogger(__name__) logger.setLevel(logging.ERROR) fastq = Fastq(logger, os.path.join(data_dir,'query.fastq'), 11 , None, 1, 50, None, None, None, True) input_alleles = [ Gene('rep7.1_repC(Cassette)_AB037671', 10, 0), Gene('rep7.5_CDS1(pKC5b)_AF378372', 10, 0), Gene('rep7.6_ORF(pKH1)_SAU38656', 10, 0), Gene('repUS14.1_repA(VRSAp)_AP003367', 10, 0)] expected_allele_names = ['rep7.1', 'rep7.5', 'rep7.6', 'repUS14.1'] filtered_alleles = fastq.filter_contained_alleles(input_alleles) self.assertEquals(expected_allele_names, list(map(lambda x: x.short_name(), filtered_alleles)))