Exemplo n.º 1
0
    def _impl_test_tree_estimator(self, name, datatype, partitions, **kwargs):
        num_cpus = kwargs.get('num_cpus', None)
        filename = data_source_path('anolis.fasta')

        md = MultiLocusDataset()
        md.read_files(seq_filename_list=[filename],
                datatype=datatype)
        md.relabel_for_sate()
        # alignment = Alignment()
        # alignment.read_filepath(filename, 'FASTA')
        te = self.get_tree_estimator(name)
        if te is None:
            _LOG.warn("test%s skipped" % name)
            return
        # alignment.datatype = datatype
        if num_cpus:
            a = te.run(alignment=md,
                       partitions=partitions,
                       tmp_dir_par=self.ts.top_level_temp,
                       delete_temps=True,
                       num_cpus=num_cpus)
        else:
            a = te.run(alignment=md,
                       partitions=partitions,
                       tmp_dir_par=self.ts.top_level_temp,
                       delete_temps=True)
Exemplo n.º 2
0
 def setUp(self):
     self.set_up()
     self.tmp_sub_dir = self.ts.create_temp_subdir(
         parent=self.ts.top_level_temp, prefix='MultiLocusDatasetTest')
     self.data_path = os.path.join(self.tmp_sub_dir,
                                   self.job_name + '_test.fasta')
     self.mlds = MultiLocusDataset()
Exemplo n.º 3
0
class MultiLocusDatasetTest(SateTestCase):
    def setUp(self):
        self.set_up()
        self.tmp_sub_dir = self.ts.create_temp_subdir(
                parent=self.ts.top_level_temp,
                prefix='MultiLocusDatasetTest')
        self.data_path = os.path.join(self.tmp_sub_dir,
                self.job_name + '_test.fasta')
        self.mlds = MultiLocusDataset()
    
    def tearDown(self):
        self.register_files()
        self.ts.remove_dir(self.tmp_sub_dir)
        self.tear_down()

    def _create_seq_file(self, seq_str):
        out = open(self.data_path, 'w')
        out.write(seq_str)
        out.close()

    def _parse_seq_dataset(self, sd):
        d = {}
        for taxon, char_vec in sd.dataset.char_matrices[0].iteritems():
            d[taxon.label] = ''.join([i for i in char_vec])
        return d

    def testRNAConversion(self):
        sf = StringIO()
        sf.write('>a\nAUGCAUGC\n')
        sf.write('>b\nAUGCAUGC\n')
        self._create_seq_file(sf.getvalue())
        sf.seek(0)
        seqs = self.parse_fasta_file(sf)
        self.mlds.read_files([self.data_path],
                datatype='RNA',
                file_format='FASTA')
        self.assertEqual(len(self.mlds), 1)
        self.assertSameDataSet([seqs, self._parse_seq_dataset(self.mlds[0])])
        self.mlds.convert_rna_to_dna()
        seqs = self.convert_rna_to_dna(seqs)
        for k, v in seqs.iteritems():
            self.assertTrue('T' in v)
        self.assertSameDataSet([seqs,
                self._parse_seq_dataset(self.mlds[0])])
        self.mlds.convert_rna_to_dna()
        self.assertSameDataSet([seqs,
                self._parse_seq_dataset(self.mlds[0])])
        self.mlds.convert_dna_to_rna()
        seqs = self.convert_rna_to_dna(seqs, reverse=True)
        for k, v in seqs.iteritems():
            self.assertFalse('T' in v)
        self.assertSameDataSet([seqs,
                self._parse_seq_dataset(self.mlds[0])])
        self.mlds.convert_rna_to_dna()
        seqs = self.convert_rna_to_dna(seqs)
        for k, v in seqs.iteritems():
            self.assertTrue('T' in v)
        self.assertSameDataSet([seqs,
                self._parse_seq_dataset(self.mlds[0])])
Exemplo n.º 4
0
def read_input_sequences(seq_filename_list,
        datatype,
        missing=None):
    md = MultiLocusDataset()
    md.read_files(seq_filename_list=seq_filename_list,
            datatype=datatype,
            missing=missing)
    return md
Exemplo n.º 5
0
class MultiLocusDatasetTest(SateTestCase):
    def setUp(self):
        self.set_up()
        self.tmp_sub_dir = self.ts.create_temp_subdir(
            parent=self.ts.top_level_temp, prefix='MultiLocusDatasetTest')
        self.data_path = os.path.join(self.tmp_sub_dir,
                                      self.job_name + '_test.fasta')
        self.mlds = MultiLocusDataset()

    def tearDown(self):
        self.register_files()
        self.ts.remove_dir(self.tmp_sub_dir)
        self.tear_down()

    def _create_seq_file(self, seq_str):
        out = open(self.data_path, 'w')
        out.write(seq_str)
        out.close()

    def _parse_seq_dataset(self, sd):
        d = {}
        for taxon, char_vec in sd.dataset.char_matrices[0].iteritems():
            d[taxon.label] = ''.join([i for i in char_vec])
        return d

    def testRNAConversion(self):
        sf = StringIO()
        sf.write('>a\nAUGCAUGC\n')
        sf.write('>b\nAUGCAUGC\n')
        self._create_seq_file(sf.getvalue())
        sf.seek(0)
        seqs = self.parse_fasta_file(sf)
        self.mlds.read_files([self.data_path],
                             datatype='RNA',
                             file_format='FASTA')
        self.assertEqual(len(self.mlds), 1)
        self.assertSameDataSet([seqs, self._parse_seq_dataset(self.mlds[0])])
        self.mlds.convert_rna_to_dna()
        seqs = self.convert_rna_to_dna(seqs)
        for k, v in seqs.iteritems():
            self.assertTrue('T' in v)
        self.assertSameDataSet([seqs, self._parse_seq_dataset(self.mlds[0])])
        self.mlds.convert_rna_to_dna()
        self.assertSameDataSet([seqs, self._parse_seq_dataset(self.mlds[0])])
        self.mlds.convert_dna_to_rna()
        seqs = self.convert_rna_to_dna(seqs, reverse=True)
        for k, v in seqs.iteritems():
            self.assertFalse('T' in v)
        self.assertSameDataSet([seqs, self._parse_seq_dataset(self.mlds[0])])
        self.mlds.convert_rna_to_dna()
        seqs = self.convert_rna_to_dna(seqs)
        for k, v in seqs.iteritems():
            self.assertTrue('T' in v)
        self.assertSameDataSet([seqs, self._parse_seq_dataset(self.mlds[0])])
Exemplo n.º 6
0
 def setUp(self):
     self.set_up()
     self.tmp_sub_dir = self.ts.create_temp_subdir(
             parent=self.ts.top_level_temp,
             prefix='MultiLocusDatasetTest')
     self.data_path = os.path.join(self.tmp_sub_dir,
             self.job_name + '_test.fasta')
     self.mlds = MultiLocusDataset()
Exemplo n.º 7
0
    def _impl_test_tree_estimator(self, name, datatype, partitions, **kwargs):
        num_cpus = kwargs.get('num_cpus', None)
        filename = data_source_path('anolis.fasta')

        md = MultiLocusDataset()
        md.read_files(seq_filename_list=[filename], datatype=datatype)
        md.relabel_for_sate()
        # alignment = Alignment()
        # alignment.read_filepath(filename, 'FASTA')
        te = self.get_tree_estimator(name)
        if te is None:
            _LOG.warn("test%s skipped" % name)
            return
        # alignment.datatype = datatype
        if num_cpus:
            a = te.run(alignment=md,
                       partitions=partitions,
                       tmp_dir_par=self.ts.top_level_temp,
                       delete_temps=True,
                       num_cpus=num_cpus)
        else:
            a = te.run(alignment=md,
                       partitions=partitions,
                       tmp_dir_par=self.ts.top_level_temp,
                       delete_temps=True)
Exemplo n.º 8
0
def read_input_sequences(seq_filename_list, datatype, missing=None):
    md = MultiLocusDataset()
    md.read_files(seq_filename_list=seq_filename_list,
                  datatype=datatype,
                  missing=missing)
    return md