Пример #1
0
 def test_upsert_tuple(self):
     """
     Checks to make sure that the client accepts both tuples and lists.
     :return:
     """
     connection = celldb.connect(URL)
     celldb.upsert_samples(connection, ('sample_tuple', ),
                           ('feature_tuple', ), ((0.1, ), ))
Пример #2
0
 def test_upsert_samples(self):
     cursor = celldb.connect(URL)
     sample_ids = ["sample_{}".format(x) for x in range(20)]
     feature_ids = ["feature_{}".format(x) for x in range(10)]
     vectors = [[random.random() for x in range(len(feature_ids))]
                for x in range(len(sample_ids))]
     celldb.upsert_samples(cursor, sample_ids, feature_ids, vectors)
     assert set(celldb.list_samples(cursor)) == set(sample_ids)
     assert set(celldb.list_features(cursor)) == set(feature_ids)
Пример #3
0
 def test_upsert_100000_wide(self):
     cursor = celldb.connect(URL)
     per_sample = 200  # set the minimum threshold of time per sample
     n_samples = 1
     n_features = 100000
     sampleIds, featureIds, vectors = _random_dataset(n_samples, n_features)
     start = time.time()
     celldb.upsert_samples(cursor, sampleIds, featureIds, vectors)
     end = time.time()
     assert (end - start) / float(n_samples) < per_sample
Пример #4
0
 def test_single_feature_matrix(self):
     """
     Tests to make sure that making a single feature is upserted as
     expected and that retrieving the resulting matrix doesn't error.
     :return:
     """
     connection = celldb.connect(URL)
     sampleIds, featureIds, vectors = _random_dataset(1, 4)
     celldb.upsert_samples(connection, sampleIds, featureIds, vectors)
     feature_ids = celldb.list_features(connection)
     assert set(feature_ids) == set(featureIds)
     _drop_tables(connection)
Пример #5
0
 def test_matrix(self):
     cursor = celldb.connect(URL)
     sampleIds, featureIds, vectors = _random_dataset(4, 4)
     celldb.upsert_samples(cursor, sampleIds, featureIds, vectors)
     matrix = celldb.matrix(cursor, sampleIds, featureIds)
     for k, row in enumerate(matrix):
         assert row[0] == sampleIds[k]
         assert len(row) == len(featureIds) + 1
         for i, value in enumerate(row[1:]):
             assert value == vectors[k][i]
     assert len(matrix) == len(sampleIds)
     _drop_tables(cursor)
Пример #6
0
    def test_sparse_matrix(self):
        cursor = celldb.connect(URL)
        sample_ids, feature_ids, vectors = _random_dataset(4, 4)
        celldb.upsert_samples(cursor, sample_ids, feature_ids, vectors)
        matrix = celldb.sparse_matrix(cursor, sample_ids, feature_ids)
        # The sparse matrix has the list of samples and features at the top
        # level for convenience. These become indices into the `values` map.
        assert len(matrix['sample_ids']) == len(sample_ids)
        assert len(matrix['feature_ids']) == len(feature_ids)

        for k, v in enumerate(matrix['values']):
            assert vectors[int(k)][int(v)] == vectors[int(k)][int(v)]
        _drop_tables(cursor)
Пример #7
0
    def test_large_matrix(self):
        """
        Tests to make sure that when request a large matrix we get satisfactory
        results.

        :return:
        """
        connection = celldb.connect(URL)
        #  The amount of time in seconds per point we're willing to accept
        per_point = 0.01
        n_samples = 10000
        n_features = 40
        points = float(n_samples * n_features)
        sampleIds, featureIds, vectors = _random_dataset(10000, 40)
        celldb.upsert_samples(connection, sampleIds, featureIds, vectors)
        start = time.time()
        matrix = celldb.matrix(connection, sampleIds, featureIds)
        assert len(matrix) == len(sampleIds)
        end = time.time()
        assert (end - start) / points < per_point
Пример #8
0
 def test_singleton_matrix(self):
     """
     Tests to make sure a single sample doesn't throw an exception and
     returns the expected results.
     :return:
     """
     connection = celldb.connect(URL)
     sampleIds, featureIds, vectors = _random_dataset(1, 4)
     celldb.upsert_samples(connection, sampleIds, featureIds, vectors)
     sample_ids = celldb.list_samples(connection)
     assert list(sample_ids)[0] == sampleIds[0]
     feature_ids = celldb.list_features(connection)
     assert len(featureIds) == len(list(feature_ids))
     matrix = celldb.matrix(connection, sampleIds, featureIds)
     for k, row in enumerate(matrix):
         assert row[0] == sampleIds[k]
         # The row has the sample_id in the first position
         assert len(row) == len(featureIds) + 1
         for i, value in enumerate(row[1:]):
             assert value == vectors[k][i]
     _drop_tables(connection)
Пример #9
0
 def test_list_many(self):
     """
     Makes sure both upsert and list happen in appropriate time
     :return:
     """
     connection = celldb.connect(URL)
     _drop_tables(connection)
     list_time = 30
     per_sample = 1
     n_samples = 10000
     n_features = 10
     sample_ids, feature_ids, vectors = _random_dataset(
         n_samples, n_features)
     start = time.time()
     celldb.upsert_samples(connection, sample_ids, feature_ids, vectors)
     end = time.time()
     assert (end - start) / float(n_samples) < per_sample
     start = time.time()
     sample_list = list(celldb.list_samples(connection))
     end = time.time()
     assert (end - start) < list_time
     assert len(sample_list) == len(sample_ids)