def test_fasta_from_sqlite_same_clusters_different_rep_seq(self):
        # same as `test_fasta_from_sqlite`, but invert the selected rep seq
        # in each cluster by swapping the feature counts in the test data.
        conn = sqlite3.connect(':memory:')
        c = conn.cursor()
        c.execute('CREATE TABLE feature_cluster_map'
                  '(feature_id TEXT PRIMARY KEY,cluster_id TEXT NOT NULL, '
                  'count INTEGER);')
        s = [('feature1', 'r1', 15), ('feature2', 'r2', 24),
             ('feature3', 'r1', 204), ('feature4', 'r2', 4)]
        c.executemany('INSERT INTO feature_cluster_map VALUES (?, ?, ?);', s)
        conn.commit()

        with tempfile.NamedTemporaryFile() as output_sequences_f:
            _fasta_from_sqlite(conn, self.input_sequences_fp,
                               output_sequences_f.name)

            obs_seqs = _read_seqs(output_sequences_f.name)
        rep_seqs = _read_seqs(self.input_sequences)
        exp_seqs = [
            rep_seqs[1],  # feature2
            rep_seqs[2]
        ]  # feature3
        _relabel_seqs(exp_seqs, ['r2', 'r1'])
        self.assertEqual(obs_seqs, exp_seqs)
Пример #2
0
    def test_clusters_with_multiple_features_with_same_count(self):
        # feature1 and feature3 cluster into r1, feature2 and feature4 cluster
        # into r2. The features within a cluster have the same count, so this
        # test should ensure that the right rep seq is picked for each cluster.
        # The query in _fasta_from_sqlite should break ties by using the
        # first feature when sorting the tied features alphabetically by id.
        conn = sqlite3.connect(':memory:')
        c = conn.cursor()
        c.execute('CREATE TABLE feature_cluster_map'
                  '(feature_id TEXT PRIMARY KEY,cluster_id TEXT NOT NULL, '
                  'count INTEGER);')
        s = [('feature1', 'r1', 204), ('feature2', 'r2', 4),
             ('feature3', 'r1', 204), ('feature4', 'r2', 4)]
        c.executemany('INSERT INTO feature_cluster_map VALUES (?, ?, ?);', s)
        conn.commit()

        with tempfile.NamedTemporaryFile() as output_sequences_f:
            _fasta_from_sqlite(conn, self.input_sequences_fp,
                               output_sequences_f.name)

            obs_seqs = _read_seqs(output_sequences_f.name)
        rep_seqs = _read_seqs(self.input_sequences)
        exp_seqs = [rep_seqs[0],  # feature1
                    rep_seqs[1]]  # feature2
        _relabel_seqs(exp_seqs, ['r1', 'r2'])
        self.assertEqual(obs_seqs, exp_seqs)
    def test_fasta_from_sqlite(self):
        # artificially clustering feature1 and feature3 into r1, and
        # feature2 and feature4 into r2.
        conn = sqlite3.connect(':memory:')
        c = conn.cursor()
        c.execute('CREATE TABLE feature_cluster_map'
                  '(feature_id TEXT PRIMARY KEY,cluster_id TEXT NOT NULL, '
                  'count INTEGER);')
        s = [('feature1', 'r1', 204), ('feature2', 'r2', 4),
             ('feature3', 'r1', 15), ('feature4', 'r2', 24)]
        c.executemany('INSERT INTO feature_cluster_map VALUES (?, ?, ?);', s)
        conn.commit()

        with tempfile.NamedTemporaryFile() as output_sequences_f:
            _fasta_from_sqlite(conn, self.input_sequences_fp,
                               output_sequences_f.name)

            obs_seqs = _read_seqs(output_sequences_f.name)
        rep_seqs = _read_seqs(self.input_sequences)
        exp_seqs = [
            rep_seqs[0],  # feature1
            rep_seqs[3]
        ]  # feature4
        _relabel_seqs(exp_seqs, ['r1', 'r2'])
        self.assertEqual(obs_seqs, exp_seqs)