def test_varicella_big(self): inDir = util.file.get_test_input_path(self) outFasta = util.file.mkstempfname('.fasta') expected = os.path.join(inDir, 'expected.hhv3.fasta') assembly.order_and_orient(os.path.join(inDir, 'contigs.hhv3.fasta'), os.path.join(inDir, 'ref.hhv3.fasta'), outFasta) self.assertEqual(str(Bio.SeqIO.read(outFasta, 'fasta').seq), str(Bio.SeqIO.read(expected, 'fasta').seq))
def test_multi_overlap(self): inDir = util.file.get_test_input_path(self) outFasta = util.file.mkstempfname('.fasta') expected = os.path.join(inDir, 'expected.ebov.small.fasta') assembly.order_and_orient(os.path.join(inDir, 'contigs.ebov.fasta'), os.path.join(inDir, 'ref.ebov.small.fasta'), outFasta) self.assertEqual(str(Bio.SeqIO.read(outFasta, 'fasta').seq), str(Bio.SeqIO.read(expected, 'fasta').seq))
def setUp(self): super(TestOrderOrientAndImputeFromReference, self).setUp() self.inDir = util.file.get_test_input_path(self) self.refFasta = os.path.join(self.inDir, 'ref.influenza_partial.fasta') self.outOrientFasta = util.file.mkstempfname('.fasta') assembly.order_and_orient( os.path.join(self.inDir, 'contigs.influenza.fasta'), self.refFasta, self.outOrientFasta)
def test_influenza_multisegment(self): inDir = util.file.get_test_input_path(self) outFasta = util.file.mkstempfname('.fasta') expected = os.path.join(inDir, 'expected.influenza.fasta') assembly.order_and_orient( os.path.join(inDir, 'contigs.influenza.fasta'), os.path.join(inDir, 'ref.influenza.fasta'), outFasta) self.assertEqualContents(outFasta, expected) os.unlink(outFasta)
def test_ebov_palindrome_refsel(self): # this tests a scenario where show-aligns has more alignments than show-tiling with util.file.tempfnames(('.out.fasta', '.stats.tsv')) as (outFasta, outStats): contigs, refs, expected, expectedStats = self.inputs('contigs.ebov.doublehit.fasta', 'refs.ebov.fasta', 'expected.ebov.doublehit.fasta', 'expected.refsel.ebov.stats.tsv') assembly.order_and_orient(contigs, refs, outFasta, n_genome_segments=1, outStats=outStats) self.assertEqualFastaSeqs(outFasta, expected) self.assertEqualContents(outStats, expectedStats)
def test_hiv_wraparound(self): # this tests a misassembly from Trinity and checks that we still use some of the contig inDir = util.file.get_test_input_path(self) outFasta = util.file.mkstempfname('.fasta') expected = os.path.join(inDir, 'expected.hiv.wrapped.fasta') assembly.order_and_orient( os.path.join(inDir, 'contigs.hiv.wrapped.fasta'), os.path.join(inDir, 'ref.hiv.fasta'), outFasta) self.assertEqual(str(Bio.SeqIO.read(outFasta, 'fasta').seq), str(Bio.SeqIO.read(expected, 'fasta').seq))
def test_lassa_protein(self): inDir = util.file.get_test_input_path(self) outFasta = util.file.mkstempfname('.fasta') expected = os.path.join(inDir, 'expected.lasv.promer.fasta') assembly.order_and_orient(os.path.join(inDir, 'contigs.lasv.fasta'), os.path.join(inDir, 'ref.lasv.fasta'), outFasta, aligner='promer') self.assertEqualContents(outFasta, expected) os.unlink(outFasta)
def test_ebov_palindrome(self): # this tests a scenario where show-aligns has more alignments than show-tiling inDir = util.file.get_test_input_path(self) outFasta = util.file.mkstempfname('.fasta') expected = os.path.join(inDir, 'expected.ebov.doublehit.fasta') assembly.order_and_orient( os.path.join(inDir, 'contigs.ebov.doublehit.fasta'), os.path.join(util.file.get_test_input_path(), 'ebov-makona.fasta'), outFasta) self.assertEqual(str(Bio.SeqIO.read(outFasta, 'fasta').seq), str(Bio.SeqIO.read(expected, 'fasta').seq))
def test_varicella_big(self): inDir = util.file.get_test_input_path(self) outFasta = util.file.mkstempfname('.fasta') expected = os.path.join(inDir, 'expected.hhv3.fasta') assembly.order_and_orient( os.path.join(inDir, 'contigs.hhv3.fasta'), os.path.join(inDir, 'ref.hhv3.fasta'), outFasta) self.assertEqual( str(Bio.SeqIO.read(outFasta, 'fasta').seq), str(Bio.SeqIO.read(expected, 'fasta').seq))
def test_lassa_protein(self): inDir = util.file.get_test_input_path(self) outFasta = util.file.mkstempfname('.fasta') expected = os.path.join(inDir, 'expected.lasv.promer.fasta') assembly.order_and_orient( os.path.join(inDir, 'contigs.lasv.fasta'), os.path.join(inDir, 'ref.lasv.fasta'), outFasta, aligner='promer') self.assertEqualContents(outFasta, expected) os.unlink(outFasta)
def test_multi_overlap(self): inDir = util.file.get_test_input_path(self) outFasta = util.file.mkstempfname('.fasta') expected = os.path.join(inDir, 'expected.ebov.small.fasta') assembly.order_and_orient( os.path.join(inDir, 'contigs.ebov.fasta'), os.path.join(inDir, 'ref.ebov.small.fasta'), outFasta) self.assertEqual( str(Bio.SeqIO.read(outFasta, 'fasta').seq), str(Bio.SeqIO.read(expected, 'fasta').seq))
def test_hiv_wraparound(self): # this tests a misassembly from Trinity and checks that we still use some of the contig inDir = util.file.get_test_input_path(self) outFasta = util.file.mkstempfname('.fasta') expected = os.path.join(inDir, 'expected.hiv.wrapped.fasta') assembly.order_and_orient( os.path.join(inDir, 'contigs.hiv.wrapped.fasta'), os.path.join(inDir, 'ref.hiv.fasta'), outFasta) self.assertEqual( str(Bio.SeqIO.read(outFasta, 'fasta').seq), str(Bio.SeqIO.read(expected, 'fasta').seq))
def test_ebov_palindrome(self): # this tests a scenario where show-aligns has more alignments than show-tiling inDir = util.file.get_test_input_path(self) outFasta = util.file.mkstempfname('.fasta') expected = os.path.join(inDir, 'expected.ebov.doublehit.fasta') assembly.order_and_orient( os.path.join(inDir, 'contigs.ebov.doublehit.fasta'), os.path.join(util.file.get_test_input_path(), 'ebov-makona.fasta'), outFasta) self.assertEqual( str(Bio.SeqIO.read(outFasta, 'fasta').seq), str(Bio.SeqIO.read(expected, 'fasta').seq))
def test_lassa_multisegment_refsel(self): with util.file.tempfnames(('.out.fasta', '.out_ref.fasta', '.stats.tsv')) \ as (outFasta, outReference, outStats): contigs, expected, expectedStats = self.inputs('contigs.lasv.fasta', 'expected.lasv.fasta', 'expected.refsel.lasv.stats.tsv') refs = [self.input('ref.lasv.{}.fasta'.format(strain)) for strain in ('josiah', 'pinneo', 'KGH_G502', 'BNI_Nig08_A19', 'nomatch')] assembly.order_and_orient(contigs, refs, outFasta, outReference=outReference, outStats=outStats) self.assertEqualContents(outFasta, expected) self.assertEqualFasta(outReference, refs[0]) self.assertEqualContents(outStats, expectedStats)
def test_alternate_contigs(self): # this tests that --outAlternateContigs works as expected inDir = util.file.get_test_input_path(self) outFasta = util.file.mkstempfname('.fasta') altFasta = util.file.mkstempfname('.fasta') expected = os.path.join(inDir, 'expected.hiv.big_indel.fasta') expectedAlt = os.path.join(inDir, 'expected.hiv.big_indel.alternates.fasta') assembly.order_and_orient( os.path.join(inDir, 'contigs.hiv.big_indel.fasta'), os.path.join(inDir, 'ref.hiv.fasta'), outFasta, outAlternateContigs=altFasta) self.assertEqualContents(outFasta, expected) self.assertEqualContents(altFasta, expectedAlt)
def test_ambig_align(self): inDir = util.file.get_test_input_path(self) contigs_gz = os.path.join(inDir, 'contigs.lasv.ambig.fasta.gz') contigs = util.file.mkstempfname('.fasta') with util.file.open_or_gzopen(contigs_gz, 'rb') as f_in: with open(contigs, 'wb') as f_out: shutil.copyfileobj(f_in, f_out) expected = os.path.join(inDir, 'expected.lasv.ambig.fasta') outFasta = util.file.mkstempfname('.fasta') assembly.order_and_orient( contigs, os.path.join(inDir, 'ref.lasv.ISTH2376.fasta'), outFasta) def get_seqs(fasta): return [str(s.seq) for s in Bio.SeqIO.parse(fasta, 'fasta')] self.assertEqual(get_seqs(outFasta), get_seqs(expected))
def test_ambig_align_ebov(self): inDir = util.file.get_test_input_path(self) contigs_gz = os.path.join(inDir, 'contigs.ebov.ambig.fasta.gz') contigs = util.file.mkstempfname('.fasta') with util.file.open_or_gzopen(contigs_gz, 'rb') as f_in: with open(contigs, 'wb') as f_out: shutil.copyfileobj(f_in, f_out) expected = os.path.join(inDir, 'expected.ebov.ambig.fasta') outFasta = util.file.mkstempfname('.fasta') assembly.order_and_orient( contigs, os.path.join(inDir, 'ref.ebov.makona_C15.fasta'), outFasta) def get_seqs(fasta): return [str(s.seq) for s in Bio.SeqIO.parse(fasta, 'fasta')] self.assertEqual(get_seqs(outFasta), get_seqs(expected))