def test_summarize_lengths(self): lens = {'a': [1, 2, 3], 'b': [3, 4]} exp = ({ 'a': stat(min=1, max=3, std=.81649658092772603, mean=2.0, median=2.0, n=3, hist=np.array([1, 0, 0, 0, 0, 1, 0, 0, 0, 1]), hist_edge=np.array( [1., 1.2, 1.4, 1.6, 1.8, 2, 2.2, 2.4, 2.6, 2.8, 3.])), 'b': stat(min=3, max=4, std=0.5, mean=3.5, median=3.5, n=2, hist=np.array([1, 0, 0, 0, 0, 0, 0, 0, 0, 1]), hist_edge=np.array( [3., 3.1, 3.2, 3.3, 3.4, 3.5, 3.6, 3.7, 3.8, 3.9, 4.])) }, stat(min=1, max=4, mean=2.6, median=3.0, std=1.019803902718557, hist=np.array([1, 0, 0, 1, 0, 0, 2, 0, 0, 1]), n=5, hist_edge=np.array( [1., 1.3, 1.6, 1.9, 2.2, 2.5, 2.8, 3.1, 3.4, 3.7, 4.]))) obs_samp, obs_full = _summarize_lengths(lens) exp_samp, exp_full = exp self.assertEqual(len(obs_samp), 2) for k in obs_samp: self._stat_equal(obs_samp[k], exp_samp[k]) self._stat_equal(obs_full, exp_full)
def test_construct_datasets(self): lens = {'a': [1, 2, 3], 'b': [3, 4]} sample_stats, _ = _summarize_lengths(lens) _construct_datasets(sample_stats, self.hdf5_file) self.assertEqual(len(self.hdf5_file.keys()), 2) self.assertTrue('a' in self.hdf5_file) self.assertTrue('a/sequence' in self.hdf5_file) self.assertTrue('a/qual' in self.hdf5_file) self.assertTrue('a/barcode/corrected' in self.hdf5_file) self.assertTrue('a/barcode/original' in self.hdf5_file) self.assertTrue('a/barcode/error' in self.hdf5_file) self.assertTrue(self.hdf5_file['a'].attrs['n'], 3) self.assertTrue('b' in self.hdf5_file) self.assertTrue('b/sequence' in self.hdf5_file) self.assertTrue('b/qual' in self.hdf5_file) self.assertTrue('b/barcode/corrected' in self.hdf5_file) self.assertTrue('b/barcode/original' in self.hdf5_file) self.assertTrue('b/barcode/error' in self.hdf5_file) self.assertTrue(self.hdf5_file['b'].attrs['n'], 2)
def test_summarize_lengths(self): lens = {'a': [1, 2, 3], 'b': [3, 4]} exp = ({'a': stat(min=1, max=3, std=.81649658092772603, mean=2.0, median=2.0, n=3, hist=np.array([1, 0, 0, 0, 0, 1, 0, 0, 0, 1]), hist_edge=np.array([1., 1.2, 1.4, 1.6, 1.8, 2, 2.2, 2.4, 2.6, 2.8, 3.])), 'b': stat(min=3, max=4, std=0.5, mean=3.5, median=3.5, n=2, hist=np.array([1, 0, 0, 0, 0, 0, 0, 0, 0, 1]), hist_edge=np.array([3., 3.1, 3.2, 3.3, 3.4, 3.5, 3.6, 3.7, 3.8, 3.9, 4.]))}, stat(min=1, max=4, mean=2.6, median=3.0, std=1.019803902718557, hist=np.array([1, 0, 0, 1, 0, 0, 2, 0, 0, 1]), n=5, hist_edge=np.array([1., 1.3, 1.6, 1.9, 2.2, 2.5, 2.8, 3.1, 3.4, 3.7, 4.]))) obs_samp, obs_full = _summarize_lengths(lens) exp_samp, exp_full = exp self.assertEqual(len(obs_samp), 2) for k in obs_samp: self._stat_equal(obs_samp[k], exp_samp[k]) self._stat_equal(obs_full, exp_full)