def test_write_to_file(self): # verify the file is written as expected n_rows = self.data.get_shape()[0] fbs = make_fbs({ "cat_A": pd.Series(["label_A"] * n_rows, dtype="category"), "cat_B": pd.Series(["label_B"] * n_rows, dtype="category"), }) res = self.annotation_put_fbs(fbs) self.assertEqual(res, json.dumps({"status": "OK"})) self.assertTrue(path.exists(self.annotations.output_file)) df = pd.read_csv(self.annotations.output_file, index_col=0, header=0, comment="#") self.assertEqual(df.shape, (n_rows, 2)) self.assertEqual(set(df.columns), {"cat_A", "cat_B"}) self.assertTrue(self.data.original_obs_index.equals(df.index)) self.assertTrue(np.all(df["cat_A"] == ["label_A"] * n_rows)) self.assertTrue(np.all(df["cat_B"] == ["label_B"] * n_rows)) # verify complete overwrite on second attempt, AND rotation occurs fbs = make_fbs({ "cat_A": pd.Series(["label_A1"] * n_rows, dtype="category"), "cat_C": pd.Series(["label_C"] * n_rows, dtype="category"), }) res = self.annotation_put_fbs(fbs) self.assertEqual(res, json.dumps({"status": "OK"})) self.assertTrue(path.exists(self.annotations.output_file)) df = pd.read_csv(self.annotations.output_file, index_col=0, header=0, comment="#") self.assertEqual(set(df.columns), {"cat_A", "cat_C"}) self.assertTrue(np.all(df["cat_A"] == ["label_A1"] * n_rows)) self.assertTrue(np.all(df["cat_C"] == ["label_C"] * n_rows)) # rotation name, ext = path.splitext(self.annotations.output_file) backup_dir = f"{name}-backups" self.assertTrue(path.isdir(backup_dir)) found_files = listdir(backup_dir) self.assertEqual(len(found_files), 1)
def test_error_checks(self): # verify that the expected errors are generated n_rows = self.data.get_shape()[0] fbs_bad = make_fbs( {"louvain": pd.Series(["undefined"] * n_rows, dtype="category")}) # ensure we catch attempt to overwrite non-writable data with self.assertRaises(KeyError): self.annotation_put_fbs(fbs_bad)
def test_category_name_throws_errors_for_categories_that_cant_be_converted_to_filenames( self): with self.app.test_request_context(): bad_category_names = make_fbs({ "cat_A": pd.Series(["label_A"] * self.n_rows, dtype="category"), "cat/B": pd.Series(["label_B"] * self.n_rows, dtype="category"), }) with self.assertRaises(AnnotationCategoryNameError): self.annotation_put_fbs(bad_category_names)
def test_put_user_annotations_obs_fbs(self): endpoint = "annotations/obs" query = "annotation-collection-name=test_annotations" url = f"{self.URL_BASE}{endpoint}?{query}" n_rows = self.data.get_shape()[0] fbs = make_fbs({"cat_A": pd.Series(["label_A"] * n_rows, dtype="category")}) result = self.session.put(url, data=fbs) self.assertEqual(result.status_code, HTTPStatus.OK) self.assertEqual(result.headers["Content-Type"], "application/json") self.assertEqual(result.json(), {"status": "OK"}) self._test_get_schema_writable("cat_A") self._test_get_user_annotations_obs_keys_fbs("cat_A", {"label_A"})
def test_put_float_data(self): # verify that OBS PUTs (annotation_put_fbs) are accessible via # GET (annotation_to_fbs_matrix) n_rows = self.data.get_shape()[0] # verifies that floating point with decimals fail. fbs = make_fbs( {"cat_F_FAIL": pd.Series([1.1] * n_rows, dtype=np.dtype("float"))}) with self.assertRaises(ValueError) as exception_context: res = self.annotation_put_fbs(fbs) self.assertEqual(str(exception_context.exception), "Columns may not have floating point types") # verifies that floating point that can be converted to int passes fbs = make_fbs( {"cat_F_PASS": pd.Series([1.0] * n_rows, dtype="float")}) res = self.annotation_put_fbs(fbs) self.assertEqual(res, json.dumps({"status": "OK"})) # check read_labels labels = self.annotations.read_labels(None) fbsAll = self.data.annotation_to_fbs_matrix("obs", None, labels) schema = schema_get_helper(self.data) annotations = decode_fbs.decode_matrix_FBS(fbsAll) self.assertEqual(annotations["n_rows"], n_rows) all_col_schema = { c["name"]: c for c in schema["annotations"]["obs"]["columns"] } self.assertEqual( all_col_schema["cat_F_PASS"], { "name": "cat_F_PASS", "type": "int32", "writable": True }, )
def setUp(self): self.user_id = "1234" self.data, self.tmp_dir, self.annotations = data_with_tmp_tiledb_annotations( MatrixDataType.H5AD) self.data.dataset_config.user_annotations = self.annotations self.db = self.annotations.db self.n_rows = self.data.get_shape()[0] self.test_dict = { "cat_A": pd.Series(["label_A"] * self.n_rows, dtype="category"), "cat_B": pd.Series(["label_B"] * self.n_rows, dtype="category"), } self.fbs = make_fbs(self.test_dict) self.df = pd.DataFrame(self.test_dict) self.app = Flask("fake_app") self.app.__setattr__("auth", auth)
def test_file_rotation_to_max_9(self): # verify we stop rotation at 9 n_rows = self.data.get_shape()[0] fbs = make_fbs({ "cat_A": pd.Series(["label_A"] * n_rows, dtype="category"), "cat_B": pd.Series(["label_B"] * n_rows, dtype="category"), }) for i in range(0, 11): res = self.annotation_put_fbs(fbs) self.assertEqual(res, json.dumps({"status": "OK"})) name, ext = path.splitext(self.annotations.output_file) backup_dir = f"{name}-backups" self.assertTrue(path.isdir(backup_dir)) found_files = listdir(backup_dir) self.assertTrue(len(found_files) <= 9)
def test_remove_categories(self): with self.app.test_request_context(): # update empty category data, which is how annotations are removed empty = make_fbs({}) self.annotation_put_fbs(empty) # verify that the tiledb uri is an empty string. dataset_id = self.db.query( [CellxGeneDataset], [CellxGeneDataset.name == self.data.get_location()])[0].id annotation = self.db.query_for_most_recent(Annotation, [ Annotation.user_id == self.user_id, Annotation.dataset_id == str(dataset_id) ]) self.assertEqual(annotation.tiledb_uri, "") # verify that read_labels returns None df = self.annotations.read_labels(self.data) self.assertIsNone(df)
def test_put_get_roundtrip(self): # verify that OBS PUTs (annotation_put_fbs) are accessible via # GET (annotation_to_fbs_matrix) n_rows = self.data.get_shape()[0] fbs = make_fbs({ "cat_A": pd.Series(["label_A"] * n_rows, dtype="category"), "cat_B": pd.Series(["label_B"] * n_rows, dtype="category"), }) # put res = self.annotation_put_fbs(fbs) self.assertEqual(res, json.dumps({"status": "OK"})) # get labels = self.annotations.read_labels(None) fbsAll = self.data.annotation_to_fbs_matrix("obs", None, labels) schema = schema_get_helper(self.data) annotations = decode_fbs.decode_matrix_FBS(fbsAll) obs_index_col_name = schema["annotations"]["obs"]["index"] self.assertEqual(annotations["n_rows"], n_rows) self.assertEqual(annotations["n_cols"], 7) self.assertIsNone(annotations["row_idx"]) self.assertEqual( annotations["col_idx"], [ obs_index_col_name, "n_genes", "percent_mito", "n_counts", "louvain", "cat_A", "cat_B" ], ) col_idx = annotations["col_idx"] self.assertEqual(annotations["columns"][col_idx.index("cat_A")], ["label_A"] * n_rows) self.assertEqual(annotations["columns"][col_idx.index("cat_B")], ["label_B"] * n_rows) # verify the schema was updated all_col_schema = { c["name"]: c for c in schema["annotations"]["obs"]["columns"] } self.assertEqual( all_col_schema["cat_A"], { "name": "cat_A", "type": "categorical", "categories": ["label_A"], "writable": True }, ) self.assertEqual( all_col_schema["cat_B"], { "name": "cat_B", "type": "categorical", "categories": ["label_B"], "writable": True }, )