def test_upsert_tuple(self): """ Checks to make sure that the client accepts both tuples and lists. :return: """ connection = celldb.connect(URL) celldb.upsert_samples(connection, ('sample_tuple', ), ('feature_tuple', ), ((0.1, ), ))
def test_upsert_samples(self): cursor = celldb.connect(URL) sample_ids = ["sample_{}".format(x) for x in range(20)] feature_ids = ["feature_{}".format(x) for x in range(10)] vectors = [[random.random() for x in range(len(feature_ids))] for x in range(len(sample_ids))] celldb.upsert_samples(cursor, sample_ids, feature_ids, vectors) assert set(celldb.list_samples(cursor)) == set(sample_ids) assert set(celldb.list_features(cursor)) == set(feature_ids)
def test_upsert_100000_wide(self): cursor = celldb.connect(URL) per_sample = 200 # set the minimum threshold of time per sample n_samples = 1 n_features = 100000 sampleIds, featureIds, vectors = _random_dataset(n_samples, n_features) start = time.time() celldb.upsert_samples(cursor, sampleIds, featureIds, vectors) end = time.time() assert (end - start) / float(n_samples) < per_sample
def test_single_feature_matrix(self): """ Tests to make sure that making a single feature is upserted as expected and that retrieving the resulting matrix doesn't error. :return: """ connection = celldb.connect(URL) sampleIds, featureIds, vectors = _random_dataset(1, 4) celldb.upsert_samples(connection, sampleIds, featureIds, vectors) feature_ids = celldb.list_features(connection) assert set(feature_ids) == set(featureIds) _drop_tables(connection)
def test_matrix(self): cursor = celldb.connect(URL) sampleIds, featureIds, vectors = _random_dataset(4, 4) celldb.upsert_samples(cursor, sampleIds, featureIds, vectors) matrix = celldb.matrix(cursor, sampleIds, featureIds) for k, row in enumerate(matrix): assert row[0] == sampleIds[k] assert len(row) == len(featureIds) + 1 for i, value in enumerate(row[1:]): assert value == vectors[k][i] assert len(matrix) == len(sampleIds) _drop_tables(cursor)
def test_sparse_matrix(self): cursor = celldb.connect(URL) sample_ids, feature_ids, vectors = _random_dataset(4, 4) celldb.upsert_samples(cursor, sample_ids, feature_ids, vectors) matrix = celldb.sparse_matrix(cursor, sample_ids, feature_ids) # The sparse matrix has the list of samples and features at the top # level for convenience. These become indices into the `values` map. assert len(matrix['sample_ids']) == len(sample_ids) assert len(matrix['feature_ids']) == len(feature_ids) for k, v in enumerate(matrix['values']): assert vectors[int(k)][int(v)] == vectors[int(k)][int(v)] _drop_tables(cursor)
def test_large_matrix(self): """ Tests to make sure that when request a large matrix we get satisfactory results. :return: """ connection = celldb.connect(URL) # The amount of time in seconds per point we're willing to accept per_point = 0.01 n_samples = 10000 n_features = 40 points = float(n_samples * n_features) sampleIds, featureIds, vectors = _random_dataset(10000, 40) celldb.upsert_samples(connection, sampleIds, featureIds, vectors) start = time.time() matrix = celldb.matrix(connection, sampleIds, featureIds) assert len(matrix) == len(sampleIds) end = time.time() assert (end - start) / points < per_point
def test_singleton_matrix(self): """ Tests to make sure a single sample doesn't throw an exception and returns the expected results. :return: """ connection = celldb.connect(URL) sampleIds, featureIds, vectors = _random_dataset(1, 4) celldb.upsert_samples(connection, sampleIds, featureIds, vectors) sample_ids = celldb.list_samples(connection) assert list(sample_ids)[0] == sampleIds[0] feature_ids = celldb.list_features(connection) assert len(featureIds) == len(list(feature_ids)) matrix = celldb.matrix(connection, sampleIds, featureIds) for k, row in enumerate(matrix): assert row[0] == sampleIds[k] # The row has the sample_id in the first position assert len(row) == len(featureIds) + 1 for i, value in enumerate(row[1:]): assert value == vectors[k][i] _drop_tables(connection)
def test_list_many(self): """ Makes sure both upsert and list happen in appropriate time :return: """ connection = celldb.connect(URL) _drop_tables(connection) list_time = 30 per_sample = 1 n_samples = 10000 n_features = 10 sample_ids, feature_ids, vectors = _random_dataset( n_samples, n_features) start = time.time() celldb.upsert_samples(connection, sample_ids, feature_ids, vectors) end = time.time() assert (end - start) / float(n_samples) < per_sample start = time.time() sample_list = list(celldb.list_samples(connection)) end = time.time() assert (end - start) < list_time assert len(sample_list) == len(sample_ids)