Пример #1
0
 def test_reconstruct_counts_unweighted(self):
     count_table = reconstruct_counts(
         region=['Bludhaven', 'Gotham'],
         regional_alignment=[
             ts.region1_align.view(Delayed).copy(),
             ts.region2_align.view(Delayed).copy()
         ],
         regional_table=[
             ts.region1_counts.view(biom.Table),
             ts.region2_counts.view(biom.Table)
         ],
         database_map=self.datbase_map['clean_name'],
         database_summary=self.database_summary,
         debug=True,
         min_counts=10,
         min_abund=1e-2,
         region_normalize='unweighted')
     npt.assert_array_equal(
         count_table.matrix_data.todense(),
         np.array([[200, 100, 0, 50, 100, 100], [200, 50, 200, 25, 50, 50],
                   [0, 200, 200, 0, 100, 100]]).T * 1.)
     npt.assert_array_equal(np.array(list(count_table.ids(axis='sample'))),
                            np.array(['sample1', 'sample2', 'sample3']))
     npt.assert_array_equal(
         np.array(list(count_table.ids(axis='observation'))),
         np.array(['seq1', 'seq2', 'seq3', 'seq4', 'seq5', 'seq6']),
     )
Пример #2
0
 def test_reconstruct_counts_align_drop_samples_warning(self):
     with warnings.catch_warnings(record=True) as w:
         count_table, summary, mapping = reconstruct_counts(
             region=['Bludhaven', 'Gotham'],
             regional_alignment=[ts.region1_align.view(pd.DataFrame).copy(), 
                                 ts.region2_align.view(pd.DataFrame).copy()],
             kmer_map=[ts.region1_db_map.view(pd.DataFrame).copy(), 
                       ts.region2_db_map.view(pd.DataFrame).copy()],
             regional_table=[ts.region1_counts.view(biom.Table).copy(),
                             ts.region2_counts.view(biom.Table).copy()],
             debug=True, 
             min_counts=590,
             min_abund=1e-2
             )
     self.assertTrue(issubclass(w[-1].category, UserWarning))
     self.assertEqual(str(w[-1].message), 
                      'There are 2 samples with fewer than 590 '
                      'total reads. These samples will be discarded.')
Пример #3
0
 def test_reconstruct_counts_align_drop_samples_error(self):
     with self.assertRaises(ValueError) as err:
         count_table, summary, mapping = reconstruct_counts(
             region=['Bludhaven', 'Gotham'],
             regional_alignment=[ts.region1_align.view(pd.DataFrame).copy(), 
                                 ts.region2_align.view(pd.DataFrame).copy()],
             kmer_map=[ts.region1_db_map.view(pd.DataFrame).copy(), 
                       ts.region2_db_map.view(pd.DataFrame).copy()],
             regional_table=[ts.region1_counts.view(biom.Table).copy(),
                             ts.region2_counts.view(biom.Table).copy()],
             debug=True, 
             min_abund=1e-2
             )
     self.assertEqual(
         str(err.exception),
         'None of the samples have more than the 1000 total '
         'sequences required for reconstruction.' 
         )
Пример #4
0
 def test_reconstruct_counts_align_drop_samples_error(self):
     with self.assertRaises(ValueError) as err:
         count_table = reconstruct_counts(
             region=['Bludhaven', 'Gotham'],
             regional_alignment=[
                 ts.region1_align.view(pd.DataFrame).copy(),
                 ts.region2_align.view(pd.DataFrame).copy()
             ],
             regional_table=[
                 ts.region1_counts.view(biom.Table),
                 ts.region2_counts.view(biom.Table)
             ],
             database_map=self.datbase_map['clean_name'],
             database_summary=self.database_summary,
             debug=True,
             min_counts=590,
             min_abund=1e-2)
     self.assertEqual(
         str(err.exception),
         'There are 2 samples with fewer than 590 total sequences. '
         'Please check your minimum counts and make sure your '
         'representative sequences are aligned with the database.')
Пример #5
0
 def test_reconstruct_counts_same_twice(self):
     known_map = pd.DataFrame(
         data=[['seq1|seq2', 'WANTCAT', 'WANTCAT', 15],
               ['seq1|seq2', 'WANTCAT', 'WANTCAT', 15],
               ['seq3', 'WANTCAT', 'WANTCAT', 15],
               ['seq5', 'WANTCAT', 'WANTCAT', 15],
               ['seq6', 'WANTCAT', 'WANTCAT', 15]],
         index=pd.Index(['seq1', 'seq2', 'seq3', 'seq5', 'seq6'], 
                         name='db-seq'),
         columns=['clean_name', 'first-fwd-primer', 'last-fwd-primer', 
                  'last-kmer-length'],
         )
     known_summary = pd.DataFrame.from_dict(orient='index', data={
         'seq1|seq2': {'num-regions': 1., 
                       'total-kmers-mapped': 2., 
                       'mean-kmer-per-region': 2.,
                       'stdv-kmer-per-region': 0.,
                       'mapped-asvs': 'asv01'
                 },
         'seq3': {'num-regions': 1, 
                  'total-kmers-mapped': 2, 
                  'mean-kmer-per-region': 2,
                  'stdv-kmer-per-region': 0,
                  'mapped-asvs': 'asv02|asv03'
                 },
         'seq5': {'num-regions': 1, 
                  'total-kmers-mapped': 1, 
                  'mean-kmer-per-region': 1,
                  'stdv-kmer-per-region': 0,
                  'mapped-asvs': 'asv04|asv05',
                 },
         'seq6': {'num-regions': 1, 
                  'total-kmers-mapped': 1, 
                  'mean-kmer-per-region': 1,
                  'stdv-kmer-per-region': 0,
                  'mapped-asvs': 'asv04|asv05',
                 },
         })
     known_summary.index.set_names('feature-id', inplace=True)
     count_table, summary, mapping =reconstruct_counts(
         region=['Bludhaven', 'Bludhaven'],
         regional_alignment=[ts.region1_align.view(pd.DataFrame).copy(), 
                             ts.region1_align.view(pd.DataFrame).copy()],
         kmer_map=[ts.region1_db_map.view(pd.DataFrame).copy(), 
                   ts.region1_db_map.view(pd.DataFrame).copy()],
         regional_table=[ts.region1_counts.view(biom.Table).copy()],
         debug=True, 
         min_counts=10,
         min_abund=1e-2, 
         region_normalize='unweighted'
         )
     npt.assert_array_equal(
         np.array(list(count_table.ids(axis='sample'))),
         np.array(['sample1', 'sample2', 'sample3'])
     )
     npt.assert_array_equal(
         np.array(list(count_table.ids(axis='observation'))),
         np.array(['seq1|seq2', 'seq3', 'seq5', 'seq6']),
     )
     npt.assert_array_equal(
         count_table.matrix_data.todense(),
         np.array([[150,   0,  50, 50],
                   [125, 100,  25, 25],
                   [100, 100,  50, 50]]).T * 1.,
        )
     pdt.assert_frame_equal(mapping, known_map)
     pdt.assert_frame_equal(summary.to_dataframe(), 
                            known_summary)
Пример #6
0
    def test_reconstruct_counts(self):
        known_map = pd.DataFrame(
            data=[['seq1', 'WANTCAT', 'CACCTCGTN', 15],
                  ['seq2', 'WANTCAT', 'CACCTCGTN', 15],
                  ['seq3', 'WANTCAT', 'CACCTCGTN', 15],
                  ['seq4', 'CACCTCGTN', 'CACCTCGTN', 15],
                  ['seq5', 'WANTCAT', 'CACCTCGTN', 15],
                  ['seq6', 'WANTCAT', 'CACCTCGTN', 15]],
            index=pd.Index(['seq1', 'seq2', 'seq3', 'seq4', 'seq5', 'seq6'], 
                            name='db-seq'),
            columns=['clean_name', 'first-fwd-primer', 'last-fwd-primer', 
                     'last-kmer-length'],
            )

        known_summary = pd.DataFrame.from_dict(orient='index', data={
            'seq1': {'num-regions': 2., 
                     'total-kmers-mapped': 2., 
                     'mean-kmer-per-region': 1.,
                     'stdv-kmer-per-region': 0.,
                     'mapped-asvs': 'asv01|asv06'
                    },
            'seq2': {'num-regions': 2, 
                     'total-kmers-mapped': 2, 
                     'mean-kmer-per-region': 1,
                    'stdv-kmer-per-region': 0,
                    'mapped-asvs': 'asv01|asv07',
                    },
            'seq3': {'num-regions': 2, 
                     'total-kmers-mapped': 3, 
                     'mean-kmer-per-region': 1.5,
                     'stdv-kmer-per-region': np.std([1, 2], ddof=1),
                     'mapped-asvs': 'asv02|asv03|asv08'
                    },
            'seq4': {'num-regions': 1, 
                     'total-kmers-mapped': 1, 
                     'mean-kmer-per-region': 1,
                     'stdv-kmer-per-region': 0,
                     'mapped-asvs': 'asv09'
                    },
            'seq5': {'num-regions': 2, 
                     'total-kmers-mapped': 2, 
                     'mean-kmer-per-region': 1,
                     'stdv-kmer-per-region': 0,
                     'mapped-asvs': 'asv04|asv05|asv10',
                    },
            'seq6': {'num-regions': 2, 
                     'total-kmers-mapped': 2, 
                     'mean-kmer-per-region': 1,
                     'stdv-kmer-per-region': 0,
                     'mapped-asvs': 'asv04|asv05|asv11',
                    },
            })
        known_summary.index.set_names('feature-id', inplace=True)
        
        
        count_table, summary, mapping = reconstruct_counts(
              region=['Bludhaven', 'Gotham'],
              regional_alignment=[ts.region1_align.view(pd.DataFrame).copy(), 
                                  ts.region2_align.view(pd.DataFrame).copy()],
              kmer_map=[ts.region1_db_map.view(pd.DataFrame).copy(), 
                        ts.region2_db_map.view(pd.DataFrame).copy()],
              regional_table=[ts.region1_counts.view(biom.Table),
                              ts.region2_counts.view(biom.Table)],
              debug=True, 
              min_counts=10,
              min_abund=1e-2)
        npt.assert_array_equal(
            np.array(count_table.matrix_data.todense()),
            np.array([[100,  50,   0,  50,  50, 50],
                      [100,  25, 100,  25,  25, 25],
                      [  0, 100, 100,   0,  50, 50]]).T
           )
        npt.assert_array_equal(
            np.array(list(count_table.ids(axis='sample'))),
            np.array(['sample1', 'sample2', 'sample3'])
        )
        npt.assert_array_equal(
            np.array(list(count_table.ids(axis='observation'))),
            np.array(['seq1', 'seq2', 'seq3', 'seq4', 'seq5', 'seq6']),
        )
        pdt.assert_frame_equal(known_map, mapping)
        pdt.assert_frame_equal(known_summary, summary.to_dataframe())