示例#1
0
 def test1000T(self):
     sd = SequenceDataset()
     fp = data_source_path('1000T.fasta')
     sd.read(open(fp, 'rU'), file_format='FASTA', datatype='DNA')
     fp = data_source_path('1000T.tree')
     tree_list = read_and_encode_splits(sd.dataset, open(fp, "rU"))
     self.assertEqual(len(tree_list), 1)
示例#2
0
 def setUp(self):
     self.set_up()
     self.dna = data_source_path('small.fasta')
     self.rna = data_source_path('smallrna.fasta')
     self.tree = data_source_path('small.tree')
     self.dna_tmp = self.get_subdir('dna')
     self.rna_tmp = self.get_subdir('rna')
     self.dna_aln = self.get_path(
             name='.marker001.small.aln',
             parent_dir=self.dna_tmp)
     self.dna_tree = self.get_path(
             name='.tre',
             parent_dir=self.dna_tmp)
     self.rna_aln = self.get_path(
             name='.marker001.smallrna.aln',
             parent_dir=self.rna_tmp)
     self.rna_tree = self.get_path(
             name='.tre',
             parent_dir=self.rna_tmp)
     self.dna_score = self.get_path(
             name='.score.txt',
             parent_dir=self.dna_tmp)
     self.rna_score = self.get_path(
             name='.score.txt',
             parent_dir=self.rna_tmp)
     self.dna_tmp_aln = self.get_path(
             name='_temp_iteration_0_seq_alignment.txt',
             parent_dir=self.dna_tmp)
     self.rna_tmp_aln = self.get_path(
             name='_temp_iteration_0_seq_alignment.txt',
             parent_dir=self.rna_tmp)
示例#3
0
    def _impl_test_aligner(self, name, fn):
        filename = data_source_path(fn)
        alignment = Alignment()
        alignment.read_filepath(filename, 'FASTA')

        aln = self.get_aligner('%s' % name)
        if aln is None:
            _LOG.warn("test%s skipped" % name)
            return
        a = aln.run(alignment,
                    tmp_dir_par=self.ts.top_level_temp,
                    delete_temps=True)

        reference_fn = data_source_path('%s.%s' % (name, fn))
        reference_aln = Alignment()
        reference_aln.read_filepath(reference_fn, 'FASTA')
        _LOG.debug('Checking results from %s against %s' % (name, reference_fn))
        if reference_aln != a:
            i = 1
            while True:
                nrfn  = reference_fn + '.' + str(i)
                if os.path.exists(nrfn):
                    reference_aln = Alignment()
                    reference_aln.read_filepath(nrfn, 'FASTA')
                    _LOG.debug('Checking results from %s against %s' % (name, nrfn))
                    if reference_aln == a:
                        self.assertEquals(reference_aln, a)
                        return True
                    i += 1
                else:
                    self.assertEquals(reference_aln, a)
示例#4
0
 def setUp(self):
     self.ts = TempFS()
     self.ts.create_top_level_temp(prefix='treeEstimatorTest', parent=os.curdir)
     self.filename = data_source_path('mafft.anolis.fasta')
     self.alignment = Alignment()
     self.alignment.read_filepath(data_source_path('mafft.anolis.fasta'),
             'FASTA')
示例#5
0
    def _impl_test_aligner(self, name, fn):
        filename = data_source_path(fn)
        alignment = Alignment()
        alignment.read_filepath(filename, 'FASTA')

        aln = self.get_aligner('%s' % name)
        if aln is None:
            _LOG.warn("test%s skipped" % name)
            return
        a = aln.run(alignment,
                    tmp_dir_par=self.ts.top_level_temp,
                    delete_temps=True)

        reference_fn = data_source_path('%s.%s' % (name, fn))
        reference_aln = Alignment()
        reference_aln.read_filepath(reference_fn, 'FASTA')
        _LOG.debug('Checking results from %s against %s' % (name, reference_fn))
        if reference_aln != a:
            i = 1
            while True:
                nrfn  = reference_fn + '.' + str(i)
                if os.path.exists(nrfn):
                    reference_aln = Alignment()
                    reference_aln.read_filepath(nrfn, 'FASTA')
                    _LOG.debug('Checking results from %s against %s' % (name, nrfn))
                    if reference_aln == a:
                        self.assertEquals(reference_aln, a)
                        return True
                    i += 1
                else:
                    self.assertEquals(reference_aln, a)
示例#6
0
 def test1000T(self):
     sd = SequenceDataset()
     fp = data_source_path('1000T.fasta')
     sd.read(open(fp, 'rU'), file_format='FASTA', datatype='DNA')
     fp = data_source_path('1000T.tree')
     tree_list = read_and_encode_splits(sd.dataset, open(fp, "rU"))
     self.assertEqual(len(tree_list), 1)
示例#7
0
 def setUp(self):
     self.ts = TempFS()
     self.ts.create_top_level_temp(prefix='treeEstimatorTest',
                                   parent=os.curdir)
     self.filename = data_source_path('mafft.anolis.fasta')
     self.alignment = Alignment()
     self.alignment.read_filepath(data_source_path('mafft.anolis.fasta'),
                                  'FASTA')
示例#8
0
 def testConcatenateAlignments(self):
     filename1 = data_source_path('small.fasta')
     filename2 = data_source_path('small.fasta')
     a = Alignment()
     b = Alignment()
     a.datatype = "DNA"
     b.datatype = "DNA"
     a.read_filepath(filename1, 'FASTA')
     b.read_filepath(filename2, 'FASTA')
示例#9
0
 def testCentroidEdge(self):
     sd = SequenceDataset()
     fp = data_source_path('100T.fasta')
     sd.read(open(fp, 'rU'), file_format='FASTA', datatype='DNA')
     fp = data_source_path('100T.tree')
     tree_list = read_and_encode_splits(sd.dataset, open(fp, "rU"))
     self.assertEqual(len(tree_list), 1)
     t = PhylogeneticTree(tree_list[0])
     self._do_test_centroid(t)
示例#10
0
 def testCentroidEdge(self):
     sd = SequenceDataset()
     fp = data_source_path('100T.fasta')
     sd.read(open(fp, 'rU'), file_format='FASTA', datatype='DNA')
     fp = data_source_path('100T.tree')
     tree_list = read_and_encode_splits(sd.dataset, open(fp, "rU"))
     self.assertEqual(len(tree_list), 1)
     t = PhylogeneticTree(tree_list[0])
     self._do_test_centroid(t)
示例#11
0
 def testConcatenateAlignments(self):
     filename1 = data_source_path('small.fasta')
     filename2 = data_source_path('small.fasta')
     a = Alignment()
     b = Alignment()
     a.datatype = "DNA"
     b.datatype = "DNA"
     a.read_filepath(filename1, 'FASTA')
     b.read_filepath(filename2, 'FASTA')
示例#12
0
 def setUp(self):
     self.set_up()
     self.tiny_rna = data_source_path('tinyrna.fasta')
     self.small_rna = data_source_path('smallrna.fasta')
     self.small_tree = data_source_path('small.tree')
     self.tiny_aln_path = self.get_path(
             '.marker001.tinyrna.aln') 
     self.small_aln_path = self.get_path(
             '.marker001.smallrna.aln') 
     self.init_aln_path = self.get_path(
             '_temp_iteration_initialsearch_seq_alignment.txt')
     self.iter_aln_path = self.get_path(
             '_temp_iteration_0_seq_alignment.txt')
     self.cfg_path = self.get_path(
             '_temp_sate_config.txt')
示例#13
0
    def _impl_test_tree_estimator(self, name, datatype, partitions, **kwargs):
        num_cpus = kwargs.get('num_cpus', None)
        filename = data_source_path('anolis.fasta')

        md = MultiLocusDataset()
        md.read_files(seq_filename_list=[filename], datatype=datatype)
        md.relabel_for_sate()
        # alignment = Alignment()
        # alignment.read_filepath(filename, 'FASTA')
        te = self.get_tree_estimator(name)
        if te is None:
            _LOG.warn("test%s skipped" % name)
            return
        # alignment.datatype = datatype
        if num_cpus:
            a = te.run(alignment=md,
                       partitions=partitions,
                       tmp_dir_par=self.ts.top_level_temp,
                       delete_temps=True,
                       num_cpus=num_cpus)
        else:
            a = te.run(alignment=md,
                       partitions=partitions,
                       tmp_dir_par=self.ts.top_level_temp,
                       delete_temps=True)
示例#14
0
    def testRaxml(self):
        filename = data_source_path('mafft.anolis.fasta')
        alignment = Alignment()
        alignment.read_filepath(filename, 'FASTA')

        if is_test_enabled(TestLevel.SLOW, _LOG):
            self._impl_test_tree_estimator('raxml', datatype="DNA", partitions=[("DNA", 1, 1456)])
示例#15
0
    def _impl_test_tree_estimator(self, name, datatype, partitions, **kwargs):
        num_cpus = kwargs.get('num_cpus', None)
        filename = data_source_path('anolis.fasta')

        md = MultiLocusDataset()
        md.read_files(seq_filename_list=[filename],
                datatype=datatype)
        md.relabel_for_sate()
        # alignment = Alignment()
        # alignment.read_filepath(filename, 'FASTA')
        te = self.get_tree_estimator(name)
        if te is None:
            _LOG.warn("test%s skipped" % name)
            return
        # alignment.datatype = datatype
        if num_cpus:
            a = te.run(alignment=md,
                       partitions=partitions,
                       tmp_dir_par=self.ts.top_level_temp,
                       delete_temps=True,
                       num_cpus=num_cpus)
        else:
            a = te.run(alignment=md,
                       partitions=partitions,
                       tmp_dir_par=self.ts.top_level_temp,
                       delete_temps=True)
示例#16
0
 def testDiagnoseBogus(self):
     fp = data_source_path('caenophidia_mos_bogus.fasta')
     self.assertRaises(Exception,
                       summary_stats_from_parse, [fp],
                       ["DNA", "RNA", "PROTEIN"],
                       careful_parse=False)
     _LOG.warn(
         "WARNING: summary_stats_from_parse does not distinguish between all bogus sequences in 'careful' mode"
     )
示例#17
0
 def testAlignment(self):
     filename = data_source_path('small.fasta')
     alignment = Alignment()
     alignment.read_filepath(filename, 'FASTA')
     num_taxa = alignment.get_num_taxa()
     self.assertEqual(num_taxa, 32)
     alignment.write_filepath(filename+'.phy', 'PHYLIP')
     alignment.write_unaligned_fasta(filename+'.raw')
     alignment.sub_alignment( alignment.keys()[0:2] ).write_unaligned_fasta(filename+'.partial.raw')
示例#18
0
 def testMulti(self):
     if is_test_enabled(TestLevel.EXHAUSTIVE, _LOG,
             module_name=".".join([self.__class__.__name__,
                     sys._getframe().f_code.co_name])):
         self._main_execution(['-m',
                 '-i', data_source_path('testmulti'),
                 '-o', self.ts.top_level_temp,
                 '--temporaries=%s' % self.ts.top_level_temp,
                 '-j', self.job_name,
                 '--iter-limit=1'])
示例#19
0
    def testLongestBipartition(self):
        treef = data_source_path('small.tree')
        pt = self.phylogeneticTreeFromFile(treef, file_format='NEWICK')
        self.assertEqual(pt.n_leaves, 32)

        e = pt.get_longest_edge()
        subtree1, subtree2 = pt.bipartition_by_edge(e)

        leaf_num = [subtree1.n_leaves, subtree2.n_leaves]
        leaf_num.sort()
        self.assertEqual(leaf_num, [1, 31])
    def testLongestBipartition(self):
        treef = data_source_path('small.tree')
        pt = self.phylogeneticTreeFromFile(treef, file_format='NEWICK')
        self.assertEqual(pt.n_leaves, 32)

        e = pt.get_longest_edge()
        subtree1, subtree2 = pt.bipartition_by_edge(e)

        leaf_num = [subtree1.n_leaves, subtree2.n_leaves]
        leaf_num.sort()
        self.assertEqual(leaf_num, [1, 31])
示例#21
0
 def testAlignment(self):
     filename = data_source_path('small.fasta')
     alignment = Alignment()
     alignment.read_filepath(filename, 'FASTA')
     num_taxa = alignment.get_num_taxa()
     self.assertEqual(num_taxa, 32)
     alignment.write_filepath(filename + '.phy', 'PHYLIP')
     alignment.write_unaligned_fasta(filename + '.raw')
     alignment.sub_alignment(
         alignment.keys()[0:2]).write_unaligned_fasta(filename +
                                                      '.partial.raw')
示例#22
0
    def _impl_test_merger(self, name):
        filename = data_source_path('merger1.fasta')
        alignment1 = Alignment()
        alignment1.read_filepath(filename, 'FASTA')
        filename = data_source_path('merger2.fasta')
        alignment2 = Alignment()
        alignment2.read_filepath(filename, 'FASTA')

        aln = self.get_merger('%s merger' % name)
        if aln is None:
            _LOG.warn("test%s skipped" % name)
            return
        a = aln.run(alignment1,
                    alignment2,
                    tmp_dir_par=self.ts.top_level_temp,
                    delete_temps=True)

        reference_fn = data_source_path('merger_result.fasta')
        reference_aln = Alignment()
        reference_aln.read_filepath(reference_fn, 'FASTA')
        self.assertEquals(reference_aln, a)
示例#23
0
    def testCentroidBipartition(self):
        treef = data_source_path('diffDecomp.nex')
        pt = self.phylogeneticTreeFromFile(treef, file_format='NEXUS')
        # pt.add_n_leaf_des_attr()
        self.assertEqual(pt.n_leaves, 484)

        e = pt.get_centroid_edge()
        subtree1, subtree2 = pt.bipartition_by_edge(e)

        leaf_num = [subtree1.n_leaves, subtree2.n_leaves]
        leaf_num.sort()
        self.assertEqual(leaf_num, [231, 253])
    def testCentroidBipartition(self):
        treef = data_source_path('diffDecomp.nex')
        pt = self.phylogeneticTreeFromFile(treef, file_format='NEXUS')
        # pt.add_n_leaf_des_attr()
        self.assertEqual(pt.n_leaves, 484)

        e = pt.get_centroid_edge()
        subtree1, subtree2 = pt.bipartition_by_edge(e)

        leaf_num = [subtree1.n_leaves, subtree2.n_leaves]
        leaf_num.sort()
        self.assertEqual(leaf_num, [231, 253])
示例#25
0
    def _impl_test_merger(self, name):
        filename = data_source_path('merger1.fasta')
        alignment1 = Alignment()
        alignment1.read_filepath(filename, 'FASTA')
        filename = data_source_path('merger2.fasta')
        alignment2 = Alignment()
        alignment2.read_filepath(filename, 'FASTA')

        aln = self.get_merger('%s merger' % name)
        if aln is None:
            _LOG.warn("test%s skipped" % name)
            return
        a = aln.run(alignment1,
                    alignment2,
                    tmp_dir_par=self.ts.top_level_temp,
                    delete_temps=True)

        reference_fn = data_source_path('merger_result.fasta')
        reference_aln = Alignment()
        reference_aln.read_filepath(reference_fn, 'FASTA')
        self.assertEquals(reference_aln, a)
示例#26
0
 def _impl_test_tree_estimator(self, name, datatype, partitions):
     filename = data_source_path('anolis.fasta')
     alignment = Alignment()
     alignment.read_filepath(filename, 'FASTA')
     te = self.get_tree_estimator(name)
     if te is None:
         _LOG.warn("test%s skipped" % name)
         return
     alignment.datatype = datatype
     a = te.run(alignment=alignment,
                partitions=partitions,
                tmp_dir_par=self.ts.top_level_temp,
                delete_temps=True)
示例#27
0
    def testDiagnoseMulti(self):
        multi_dir = data_source_path('testmulti/caenophidia')
        fp = os.path.join(multi_dir, 'caenophidia_mos.fasta')
        fp2 = os.path.join(multi_dir, 'caenophidia_mos2.fasta')
        s = summary_stats_from_parse([fp, fp2], ["DNA", "RNA", "PROTEIN"],
                                     careful_parse=False)
        self.assertEqual(s[0], "PROTEIN")
        self.assertEqual(s[1], [(114, 189), (109, 202)])
        self.assertEqual(
            s[2], 116
        )  # two taxa names were changed and 5 were deleted, so the union is 116
        self.assertEqual(s[3], False)

        fp3 = data_source_path('smallrna.fasta')
        s = summary_stats_from_parse([fp3, fp3], ["DNA", "RNA", "PROTEIN"],
                                     careful_parse=False)
        self.assertEqual(s[0], "RNA")
        self.assertEqual(s[1], [(32, 1650), (32, 1650)])
        self.assertEqual(s[2], 32)
        self.assertEqual(s[3], True)
        self.assertRaises(Exception,
                          summary_stats_from_parse, [fp, fp3],
                          ["DNA", "RNA", "PROTEIN"],
                          careful_parse=False)
        _LOG.warn(
            "WARNING: summary_stats_from_parse will read multi with dna and protein as entirely protein. MIXED data type support is needed!"
        )

        fp4 = data_source_path('small.fasta')
        fp5 = data_source_path('smallunaligned.fasta')
        s = summary_stats_from_parse([fp4, fp4], ["DNA", "RNA", "PROTEIN"],
                                     careful_parse=False)
        self.assertEqual(s[0], "DNA")
        self.assertEqual(s[1], [(32, 1650), (32, 1650)])
        self.assertEqual(s[2], 32)
        self.assertEqual(s[3], True)
        self.assertRaises(Exception,
                          summary_stats_from_parse, [fp, fp3],
                          ["DNA", "RNA", "PROTEIN"],
                          careful_parse=False)
        _LOG.warn(
            "WARNING: summary_stats_from_parse will read multi with dna and protein as entirely protein. MIXED data type support is needed!"
        )

        fp4 = data_source_path('small.fasta')
        fp5 = data_source_path('smallunaligned.fasta')
        s = summary_stats_from_parse([fp4, fp5], ["DNA", "RNA", "PROTEIN"],
                                     careful_parse=False)
        self.assertEqual(s[0], "DNA")
        self.assertEqual(s[1], [(32, 1650), (32, 1650)])
        self.assertEqual(s[2], 32)
        self.assertEqual(s[3], False)
        self.assertRaises(Exception,
                          summary_stats_from_parse, [fp, fp3],
                          ["DNA", "RNA", "PROTEIN"],
                          careful_parse=False)
        _LOG.warn(
            "WARNING: summary_stats_from_parse will read multi with dna and protein as entirely protein. MIXED data type support is needed!"
        )
示例#28
0
 def setUp(self):
     self.set_up()
     self.anolis_file = data_source_path('anolis.fasta')
     self.caenophidia_file = data_source_path('caenophidia_mos.fasta')
     self.multi_dir = data_source_path('testmulti/')
     self.multi_aa_dir = os.path.join(self.multi_dir, 'caenophidia')
     self.figwasp_dir = os.path.join(self.multi_dir, 'figwasps')
     self.hummingbird_dir = os.path.join(self.multi_dir, 'hummingbirds')
     self.ambig_dna = data_source_path('small.ambiguities.fasta')
     self.ambig_dna_tree = data_source_path('small.tree')
     self.ambig_aa = data_source_path('caenophidia_mos.ambiguities.fasta')
     self.ambig_aa_tree = data_source_path('caenophidia_mos.tre')
示例#29
0
 def setUp(self):
     self.set_up()
     self.multi_dir = data_source_path('testmulti/')
     self.multi_mixed_dir = os.path.join(self.multi_dir, 'mixed')
     self.in_path1 = os.path.join(self.multi_mixed_dir, 'tinydna.fasta')
     self.in_path2 = os.path.join(self.multi_mixed_dir, 'tinyrna.fasta')
     self.aln_path1 = self.get_path(
             '.marker001.tinydna.aln') 
     self.aln_path2 = self.get_path(
             '.marker002.tinyrna.aln') 
     self.cfg_path = self.get_path(
             '_temp_sate_config.txt')
     self.concat_path = self.get_path(
             '_temp_iteration_0_seq_alignment.txt')
示例#30
0
 def setUp(self):
     self.set_up()
     data_file = data_source_path('tiny.fasta')
     unicode_name = u'm\xe9ss\xfdp\xe4th'
     self.tmp_sub_dir = self.get_subdir(unicode_name)
     self.data_path = self.get_path(
             name=unicode_name + '.fasta',
             parent_dir=self.tmp_sub_dir)
     src = open(data_file, 'rU')
     out = open(self.data_path, 'w')
     for line in src:
         out.write(line)
     src.close()
     out.close()
示例#31
0
 def setUp(self):
     self.set_up()
     data_file = data_source_path('tiny.fasta')
     space_name = 'a path with a lot of spaces'
     self.tmp_sub_dir = self.get_subdir(space_name)
     self.data_path = self.get_path(
             name=space_name + '.fasta',
             parent_dir=self.tmp_sub_dir)
     src = open(data_file, 'rU')
     out = open(self.data_path, 'w')
     for line in src:
         out.write(line)
     src.close()
     out.close()
示例#32
0
 def testDiagnoseProt(self):
     fp = data_source_path('caenophidia_mos.fasta')
     print fp
     s = summary_stats_from_parse([fp], ["DNA", "RNA", "PROTEIN"], careful_parse=False)
     self.assertEqual(s[0], "PROTEIN")
     self.assertEqual(s[1], [(114, 189)])
     self.assertEqual(s[2], 114)
     self.assertEqual(s[3], False)
     s = summary_stats_from_parse([fp], ["DNA", "RNA", "PROTEIN"], careful_parse=True)
     self.assertEqual(s[0], "PROTEIN")
     self.assertEqual(s[1], [(114, 189)])
     self.assertEqual(s[2], 114)
     self.assertEqual(s[3], False)
     self.assertRaises(Exception, summary_stats_from_parse, [fp], ["DNA", "RNA"], careful_parse=False)
     self.assertRaises(Exception, summary_stats_from_parse, [fp], ["DNA", "RNA"], careful_parse=True)
示例#33
0
 def testDiagnoseRNA(self):
     fp = data_source_path('smallrna.fasta')
     print fp
     s = summary_stats_from_parse([fp], ["DNA", "RNA", "PROTEIN"], careful_parse=False)
     self.assertEqual(s[0], "RNA")
     self.assertEqual(s[1], [(32, 1650)])
     self.assertEqual(s[2], 32)
     self.assertEqual(s[3], True)
     s = summary_stats_from_parse([fp], ["DNA", "RNA", "PROTEIN"], careful_parse=True)
     self.assertEqual(s[0], "RNA")
     self.assertEqual(s[1], [(32, 1650)])
     self.assertEqual(s[2], 32)
     self.assertEqual(s[3], True)
     self.assertRaises(Exception, summary_stats_from_parse, [fp], ["DNA", "PROTEIN"], careful_parse=False)
     _LOG.warn("WARNING: summary_stats_from_parse does not distinguish between RNA and DNA in 'careful' mode") 
示例#34
0
 def testDiagnoseDNA(self):
     fp = data_source_path('small.fasta')
     print fp
     s = summary_stats_from_parse([fp], ["DNA", "RNA", "PROTEIN"], careful_parse=False)
     self.assertEqual(s[0], "DNA")
     self.assertEqual(s[1], [(32, 1650)])
     self.assertEqual(s[2], 32)
     self.assertEqual(s[3], True)
     s = summary_stats_from_parse([fp], ["DNA", "RNA", "PROTEIN"], careful_parse=True)
     self.assertEqual(s[0], "DNA")
     self.assertEqual(s[1], [(32, 1650)])
     self.assertEqual(s[2], 32)
     self.assertEqual(s[3], True)
     self.assertRaises(Exception, summary_stats_from_parse, [fp], ["RNA"], careful_parse=False)
     self.assertRaises(Exception, summary_stats_from_parse, [fp], ["RNA"], careful_parse=True)
示例#35
0
 def testDiagnoseDNA(self):
     fp = data_source_path('small.fasta')
     print fp
     s = summary_stats_from_parse([fp], ["DNA", "RNA", "PROTEIN"],
                                  careful_parse=False)
     self.assertEqual(s[0], "DNA")
     self.assertEqual(s[1], [(32, 1650)])
     self.assertEqual(s[2], 32)
     self.assertEqual(s[3], True)
     s = summary_stats_from_parse([fp], ["DNA", "RNA", "PROTEIN"],
                                  careful_parse=True)
     self.assertEqual(s[0], "DNA")
     self.assertEqual(s[1], [(32, 1650)])
     self.assertEqual(s[2], 32)
     self.assertEqual(s[3], True)
     self.assertRaises(Exception,
                       summary_stats_from_parse, [fp], ["RNA"],
                       careful_parse=False)
     self.assertRaises(Exception,
                       summary_stats_from_parse, [fp], ["RNA"],
                       careful_parse=True)
示例#36
0
 def testDiagnoseRNA(self):
     fp = data_source_path('smallrna.fasta')
     print fp
     s = summary_stats_from_parse([fp], ["DNA", "RNA", "PROTEIN"],
                                  careful_parse=False)
     self.assertEqual(s[0], "RNA")
     self.assertEqual(s[1], [(32, 1650)])
     self.assertEqual(s[2], 32)
     self.assertEqual(s[3], True)
     s = summary_stats_from_parse([fp], ["DNA", "RNA", "PROTEIN"],
                                  careful_parse=True)
     self.assertEqual(s[0], "RNA")
     self.assertEqual(s[1], [(32, 1650)])
     self.assertEqual(s[2], 32)
     self.assertEqual(s[3], True)
     self.assertRaises(Exception,
                       summary_stats_from_parse, [fp], ["DNA", "PROTEIN"],
                       careful_parse=False)
     _LOG.warn(
         "WARNING: summary_stats_from_parse does not distinguish between RNA and DNA in 'careful' mode"
     )
示例#37
0
 def testDiagnoseProt(self):
     fp = data_source_path('caenophidia_mos.fasta')
     print fp
     s = summary_stats_from_parse([fp], ["DNA", "RNA", "PROTEIN"],
                                  careful_parse=False)
     self.assertEqual(s[0], "PROTEIN")
     self.assertEqual(s[1], [(114, 189)])
     self.assertEqual(s[2], 114)
     self.assertEqual(s[3], False)
     s = summary_stats_from_parse([fp], ["DNA", "RNA", "PROTEIN"],
                                  careful_parse=True)
     self.assertEqual(s[0], "PROTEIN")
     self.assertEqual(s[1], [(114, 189)])
     self.assertEqual(s[2], 114)
     self.assertEqual(s[3], False)
     self.assertRaises(Exception,
                       summary_stats_from_parse, [fp], ["DNA", "RNA"],
                       careful_parse=False)
     self.assertRaises(Exception,
                       summary_stats_from_parse, [fp], ["DNA", "RNA"],
                       careful_parse=True)
示例#38
0
    def testDiagnoseMulti(self):
        multi_dir = data_source_path('testmulti/caenophidia')
        fp = os.path.join(multi_dir,'caenophidia_mos.fasta')
        fp2 = os.path.join(multi_dir,'caenophidia_mos2.fasta')
        s = summary_stats_from_parse([fp, fp2], ["DNA", "RNA", "PROTEIN"], careful_parse=False)
        self.assertEqual(s[0], "PROTEIN")
        self.assertEqual(s[1], [(114, 189), (109, 202)])
        self.assertEqual(s[2], 116) # two taxa names were changed and 5 were deleted, so the union is 116
        self.assertEqual(s[3], False)

        fp3 = data_source_path('smallrna.fasta')
        s = summary_stats_from_parse([fp3, fp3], ["DNA", "RNA", "PROTEIN"], careful_parse=False)
        self.assertEqual(s[0], "RNA")
        self.assertEqual(s[1], [(32, 1650),(32, 1650)])
        self.assertEqual(s[2], 32)
        self.assertEqual(s[3], True)
        self.assertRaises(Exception, summary_stats_from_parse, [fp, fp3], ["DNA", "RNA", "PROTEIN"], careful_parse=False)
        _LOG.warn("WARNING: summary_stats_from_parse will read multi with dna and protein as entirely protein. MIXED data type support is needed!") 


        fp4 = data_source_path('small.fasta')
        fp5 = data_source_path('smallunaligned.fasta')
        s = summary_stats_from_parse([fp4, fp4], ["DNA", "RNA", "PROTEIN"], careful_parse=False)
        self.assertEqual(s[0], "DNA")
        self.assertEqual(s[1], [(32, 1650),(32, 1650)])
        self.assertEqual(s[2], 32) 
        self.assertEqual(s[3], True)
        self.assertRaises(Exception, summary_stats_from_parse, [fp, fp3], ["DNA", "RNA", "PROTEIN"], careful_parse=False)
        _LOG.warn("WARNING: summary_stats_from_parse will read multi with dna and protein as entirely protein. MIXED data type support is needed!") 

        fp4 = data_source_path('small.fasta')
        fp5 = data_source_path('smallunaligned.fasta')
        s = summary_stats_from_parse([fp4, fp5], ["DNA", "RNA", "PROTEIN"], careful_parse=False)
        self.assertEqual(s[0], "DNA")
        self.assertEqual(s[1], [(32, 1650),(32, 1650)])
        self.assertEqual(s[2], 32) 
        self.assertEqual(s[3], False)
        self.assertRaises(Exception, summary_stats_from_parse, [fp, fp3], ["DNA", "RNA", "PROTEIN"], careful_parse=False)
        _LOG.warn("WARNING: summary_stats_from_parse will read multi with dna and protein as entirely protein. MIXED data type support is needed!") 
示例#39
0
 def testDiagnoseBogus(self):
     fp = data_source_path('caenophidia_mos_bogus.fasta')
     self.assertRaises(Exception, summary_stats_from_parse, [fp], ["DNA", "RNA", "PROTEIN"], careful_parse=False)
     _LOG.warn("WARNING: summary_stats_from_parse does not distinguish between all bogus sequences in 'careful' mode") 
示例#40
0
 def testMulti(self):
     if is_test_enabled(TestLevel.EXHAUSTIVE, _LOG):
         self._main_execution(['-m', '-i', data_source_path('testmulti'), '--iter-limit=1'])
示例#41
0
 def testDNAFasta(self):
     sd = SequenceDataset()
     fp = data_source_path('anolis.fasta')
     sd.read(open(fp, 'rU'), file_format='FASTA', datatype='DNA')
示例#42
0
 def testDNAFasta(self):
     sd = SequenceDataset()
     fp = data_source_path('anolis.fasta')
     sd.read(open(fp, 'rU'), file_format='FASTA', datatype='DNA')
示例#43
0
 def setUp(self):
     self.set_up()
     self.data = data_source_path('tiny.fasta')
     self.tree = data_source_path('tiny_name_mismatch.tre')
示例#44
0
 def setUp(self):
     self.set_up()
     self.data = data_source_path('tiny.lowercase.fasta')