class DataLoadEngineTest(unittest.TestCase):
    """
    Test file loading, including deferred loading/update.
    """
    def setUp(self):
        self.data_file = DataLocator("example-dataset/pbmc3k.h5ad")
        self.data = ScanpyEngine()

    def test_init(self):
        self.assertIsNone(self.data.data)

    def test_delayed_load_args(self):
        args = {
            "layout": ["tsne"],
            "max_category_items": 1000,
            "obs_names": "foo",
            "var_names": "bar",
            "diffexp_lfc_cutoff": 0.1,
            "annotations": False,
            "annotations_file": None,
            "annotations_output_dir": None,
            "backed": False,
            "diffexp_may_be_slow": False,
            "disable_diffexp": False
        }
        self.data.update(args=args)
        self.assertEqual(args, self.data.config)

    def test_requires_data(self):
        with self.assertRaises(DriverError):
            self.data._create_schema()

    def test_delayed_load_data(self):
        self.data.update(data_locator=self.data_file)
        self.data._create_schema()
        self.assertEqual(self.data.cell_count, 2638)
        self.assertEqual(self.data.gene_count, 1838)
        epsilon = 0.000_005
        self.assertTrue(self.data.data.X[0, 0] - -0.171_469_51 < epsilon)

    def test_diffexp_topN(self):
        self.data.update(data_locator=self.data_file)
        f1 = {"filter": {"obs": {"index": [[0, 500]]}}}
        f2 = {"filter": {"obs": {"index": [[500, 1000]]}}}
        result = json.loads(self.data.diffexp_topN(f1["filter"], f2["filter"]))
        self.assertEqual(len(result), 10)
        result = json.loads(
            self.data.diffexp_topN(f1["filter"], f2["filter"], 20))
        self.assertEqual(len(result), 20)
Пример #2
0
class DataLoadEngineTest(unittest.TestCase):
    def setUp(self):
        self.data_file = "example-dataset/pbmc3k.h5ad"
        self.data = ScanpyEngine()

    def test_init(self):
        self.assertIsNone(self.data.data)

    def test_delayed_load_args(self):
        args = {
            "layout": ["tsne"],
            "max_category_items": 1000,
            "obs_names": "foo",
            "var_names": "bar",
            "diffexp_lfc_cutoff": 0.1,
        }
        self.data.update(args=args)
        self.assertEqual(args, self.data.config)

    def test_requires_data(self):
        with self.assertRaises(DriverError):
            self.data._create_schema()

    def test_delayed_load_data(self):
        self.data.update(data=self.data_file)
        self.data._create_schema()
        self.assertEqual(self.data.cell_count, 2638)
        self.assertEqual(self.data.gene_count, 1838)
        epsilon = 0.000_005
        self.assertTrue(self.data.data.X[0, 0] - -0.171_469_51 < epsilon)

    def test_diffexp_topN(self):
        self.data.update(data=self.data_file)
        f1 = {"filter": {"obs": {"index": [[0, 500]]}}}
        f2 = {"filter": {"obs": {"index": [[500, 1000]]}}}
        result = json.loads(self.data.diffexp_topN(f1["filter"], f2["filter"]))
        self.assertEqual(len(result), 10)
        result = json.loads(
            self.data.diffexp_topN(f1["filter"], f2["filter"], 20))
        self.assertEqual(len(result), 20)

    if __name__ == "__main__":
        unittest.main()
Пример #3
0
class EngineTest(unittest.TestCase):
    def setUp(self):
        args = {
            "layout": ["umap"],
            "max_category_items": 100,
            "obs_names": None,
            "var_names": None,
            "diffexp_lfc_cutoff": 0.01,
            "layout_file": None,
            "backed": self.backed
        }
        self.data = ScanpyEngine(DataLocator(self.data_locator), args)

    def test_init(self):
        self.assertEqual(self.data.cell_count, 2638)
        self.assertEqual(self.data.gene_count, 1838)
        epsilon = 0.000_005
        self.assertTrue(self.data.data.X[0, 0] - -0.171_469_51 < epsilon)

    def test_mandatory_annotations(self):
        obs_index_col_name = self.data.get_schema(
        )["annotations"]["obs"]["index"]
        self.assertIn(obs_index_col_name, self.data.data.obs)
        self.assertEqual(list(self.data.data.obs.index), list(range(2638)))
        var_index_col_name = self.data.get_schema(
        )["annotations"]["var"]["index"]
        self.assertIn(var_index_col_name, self.data.data.var)
        self.assertEqual(list(self.data.data.var.index), list(range(1838)))

    @pytest.mark.filterwarnings("ignore:Scanpy data matrix")
    def test_data_type(self):
        # don't run the test on the more exotic data types, as they don't
        # support the astype() interface (used by this test, but not underlying app)
        if isinstance(self.data.data.X, np.ndarray):
            self.data.data.X = self.data.data.X.astype("float64")
            with self.assertWarns(UserWarning):
                self.data._validate_data_types()

    def test_filter_idx(self):
        filter_ = {"filter": {"var": {"index": [1, 99, [200, 300]]}}}
        fbs = self.data.data_frame_to_fbs_matrix(filter_["filter"], "var")
        data = decode_fbs.decode_matrix_FBS(fbs)
        self.assertEqual(data["n_rows"], 2638)
        self.assertEqual(data["n_cols"], 102)

    def test_filter_complex(self):
        filter_ = {
            "filter": {
                "var": {
                    "annotation_value": [{
                        "name": "n_cells",
                        "min": 10
                    }],
                    "index": [1, 99, [200, 300]]
                }
            }
        }
        fbs = self.data.data_frame_to_fbs_matrix(filter_["filter"], "var")
        data = decode_fbs.decode_matrix_FBS(fbs)
        self.assertEqual(data["n_rows"], 2638)
        self.assertEqual(data["n_cols"], 91)

    def test_obs_and_var_names(self):
        self.assertEqual(
            np.sum(self.data.data.var[self.data.get_schema()["annotations"]
                                      ["var"]["index"]].isna()), 0)
        self.assertEqual(
            np.sum(self.data.data.obs[self.data.get_schema()["annotations"]
                                      ["obs"]["index"]].isna()), 0)

    def test_get_schema(self):
        with open(path.join(path.dirname(__file__), "schema.json")) as fh:
            schema = json.load(fh)
            self.assertEqual(self.data.get_schema(), schema)

    def test_schema_produces_error(self):
        self.data.data.obs["time"] = pd.Series(
            list([time.time() for i in range(self.data.cell_count)]),
            dtype="datetime64[ns]",
        )
        with pytest.raises(TypeError):
            self.data._create_schema()

    def test_config(self):
        self.assertEqual(
            self.data.features["layout"]["obs"],
            {
                "available": True,
                "interactiveLimit": 50000
            },
        )

    def test_layout(self):
        fbs = self.data.layout_to_fbs_matrix()
        layout = decode_fbs.decode_matrix_FBS(fbs)
        self.assertEqual(layout["n_cols"], 2)
        self.assertEqual(layout["n_rows"], 2638)

        X = layout["columns"][0]
        self.assertTrue((X >= 0).all() and (X <= 1).all())
        Y = layout["columns"][1]
        self.assertTrue((Y >= 0).all() and (Y <= 1).all())

    def test_annotations(self):
        fbs = self.data.annotation_to_fbs_matrix("obs")
        annotations = decode_fbs.decode_matrix_FBS(fbs)
        self.assertEqual(annotations["n_rows"], 2638)
        self.assertEqual(annotations["n_cols"], 5)
        obs_index_col_name = self.data.get_schema(
        )["annotations"]["obs"]["index"]
        self.assertEqual(
            annotations["col_idx"],
            [
                obs_index_col_name, "n_genes", "percent_mito", "n_counts",
                "louvain"
            ],
        )

        fbs = self.data.annotation_to_fbs_matrix("var")
        annotations = decode_fbs.decode_matrix_FBS(fbs)
        self.assertEqual(annotations['n_rows'], 1838)
        self.assertEqual(annotations['n_cols'], 2)
        var_index_col_name = self.data.get_schema(
        )["annotations"]["var"]["index"]
        self.assertEqual(annotations["col_idx"],
                         [var_index_col_name, "n_cells"])

    def test_annotation_fields(self):
        fbs = self.data.annotation_to_fbs_matrix("obs",
                                                 ["n_genes", "n_counts"])
        annotations = decode_fbs.decode_matrix_FBS(fbs)
        self.assertEqual(annotations["n_rows"], 2638)
        self.assertEqual(annotations['n_cols'], 2)

        var_index_col_name = self.data.get_schema(
        )["annotations"]["var"]["index"]
        fbs = self.data.annotation_to_fbs_matrix("var", [var_index_col_name])
        annotations = decode_fbs.decode_matrix_FBS(fbs)
        self.assertEqual(annotations['n_rows'], 1838)
        self.assertEqual(annotations['n_cols'], 1)

    def test_annotation_put(self):
        with self.assertRaises(DisabledFeatureError):
            self.data.annotation_put_fbs(None, "obs")

    def test_diffexp_topN(self):
        f1 = {"filter": {"obs": {"index": [[0, 500]]}}}
        f2 = {"filter": {"obs": {"index": [[500, 1000]]}}}
        result = json.loads(self.data.diffexp_topN(f1["filter"], f2["filter"]))
        self.assertEqual(len(result), 10)
        result = json.loads(
            self.data.diffexp_topN(f1["filter"], f2["filter"], 20))
        self.assertEqual(len(result), 20)

    def test_data_frame(self):
        f1 = {"var": {"index": [[0, 10]]}}
        fbs = self.data.data_frame_to_fbs_matrix(f1, "var")
        data = decode_fbs.decode_matrix_FBS(fbs)
        self.assertEqual(data["n_rows"], 2638)
        self.assertEqual(data["n_cols"], 10)

        with self.assertRaises(ValueError):
            self.data.data_frame_to_fbs_matrix(None, "obs")

    def test_filtered_data_frame(self):
        filter_ = {
            "filter": {
                "var": {
                    "annotation_value": [{
                        "name": "n_cells",
                        "min": 100
                    }]
                }
            }
        }
        fbs = self.data.data_frame_to_fbs_matrix(filter_["filter"], "var")
        data = decode_fbs.decode_matrix_FBS(fbs)
        self.assertEqual(data["n_rows"], 2638)
        self.assertEqual(data["n_cols"], 1040)

        filter_ = {
            "filter": {
                "obs": {
                    "annotation_value": [{
                        "name": "n_counts",
                        "min": 3000
                    }]
                }
            }
        }
        with self.assertRaises(FilterError):
            self.data.data_frame_to_fbs_matrix(filter_["filter"], "var")

    def test_data_named_gene(self):
        var_index_col_name = self.data.get_schema(
        )["annotations"]["var"]["index"]
        filter_ = {
            "filter": {
                "var": {
                    "annotation_value": [{
                        "name": var_index_col_name,
                        "values": ["RER1"]
                    }]
                }
            }
        }
        fbs = self.data.data_frame_to_fbs_matrix(filter_["filter"], "var")
        data = decode_fbs.decode_matrix_FBS(fbs)
        self.assertEqual(data["n_rows"], 2638)
        self.assertEqual(data["n_cols"], 1)
        self.assertEqual(data["col_idx"], [4])

        filter_ = {
            "filter": {
                "var": {
                    "annotation_value": [{
                        "name": var_index_col_name,
                        "values": ["SPEN", "TYMP", "PRMT2"]
                    }]
                }
            }
        }
        fbs = self.data.data_frame_to_fbs_matrix(filter_["filter"], "var")
        data = decode_fbs.decode_matrix_FBS(fbs)
        self.assertEqual(data["n_rows"], 2638)
        self.assertEqual(data["n_cols"], 3)
        self.assertTrue((data["col_idx"] == [15, 1818, 1837]).all())
Пример #4
0
class UtilTest(unittest.TestCase):
    def setUp(self):
        args = {
            "layout": "umap",
            "diffexp": "ttest",
            "max_category_items": 100,
            "obs_names": None,
            "var_names": None,
            "diffexp_lfc_cutoff": 0.01,
        }

        self.data = ScanpyEngine("example-dataset/pbmc3k.h5ad", args)
        self.data._create_schema()

    def test_init(self):
        self.assertEqual(self.data.cell_count, 2638)
        self.assertEqual(self.data.gene_count, 1838)
        epsilon = 0.000_005
        self.assertTrue(self.data.data.X[0, 0] - -0.171_469_51 < epsilon)

    def test_mandatory_annotations(self):
        self.assertIn("name", self.data.data.obs)
        self.assertEqual(list(self.data.data.obs.index), list(range(2638)))
        self.assertIn("name", self.data.data.var)
        self.assertEqual(list(self.data.data.var.index), list(range(1838)))

    @pytest.mark.filterwarnings("ignore:Scanpy data matrix")
    def test_data_type(self):
        self.data.data.X = self.data.data.X.astype("float64")
        with self.assertWarns(UserWarning):
            self.data._validate_data_types()

    def test_filter_idx(self):
        filter_ = {
            "filter": {
                "var": {
                    "index": [1, 99, [200, 300]]
                },
                "obs": {
                    "index": [1, 99, [1000, 2000]]
                },
            }
        }
        data = self.data.filter_dataframe(filter_["filter"])
        self.assertEqual(data.shape, (1002, 102))

    def test_filter_annotation(self):
        filter_ = {
            "filter": {
                "obs": {
                    "annotation_value": [{
                        "name": "louvain",
                        "values": ["NK cells", "CD8 T cells"]
                    }]
                }
            }
        }
        data = self.data.filter_dataframe(filter_["filter"])
        self.assertEqual(data.shape, (470, 1838))
        filter_ = {
            "filter": {
                "obs": {
                    "annotation_value": [{
                        "name": "n_counts",
                        "min": 3000
                    }]
                }
            }
        }
        data = self.data.filter_dataframe(filter_["filter"])
        self.assertEqual(data.shape, (497, 1838))

    def test_filter_annotation_no_uns(self):
        filter_ = {
            "filter": {
                "var": {
                    "annotation_value": [{
                        "name": "name",
                        "values": ["RER1"]
                    }]
                }
            }
        }
        data = self.data.filter_dataframe(filter_["filter"])
        self.assertEqual(data.shape[1], 1)

    def test_filter_complex(self):
        filter_ = {
            "filter": {
                "var": {
                    "index": [1, 99, [200, 300]]
                },
                "obs": {
                    "annotation_value": [
                        {
                            "name": "louvain",
                            "values": ["NK cells", "CD8 T cells"]
                        },
                        {
                            "name": "n_counts",
                            "min": 3000
                        },
                    ],
                    "index": [1, 99, [1000, 2000]],
                },
            }
        }
        data = self.data.filter_dataframe(filter_["filter"])
        self.assertEqual(data.shape, (15, 102))

    def test_obs_and_var_names(self):
        self.assertEqual(np.sum(self.data.data.var["name"].isna()), 0)
        self.assertEqual(np.sum(self.data.data.obs["name"].isna()), 0)

    def test_schema(self):
        with open(path.join(path.dirname(__file__), "schema.json")) as fh:
            schema = json.load(fh)
            self.assertEqual(self.data.schema, schema)

    def test_schema_produces_error(self):
        self.data.data.obs["time"] = Series(
            list([time.time() for i in range(self.data.cell_count)]),
            dtype="datetime64[ns]",
        )
        with pytest.raises(TypeError):
            self.data._create_schema()

    def test_config(self):
        self.assertEqual(
            self.data.features["layout"]["obs"],
            {
                "available": True,
                "interactiveLimit": 50000
            },
        )

    def test_layout(self):
        layout = json.loads(self.data.layout(None))
        self.assertEqual(layout["layout"]["ndims"], 2)
        self.assertEqual(len(layout["layout"]["coordinates"]), 2638)
        self.assertEqual(layout["layout"]["coordinates"][0][0], 0)
        for idx, val in enumerate(layout["layout"]["coordinates"]):
            self.assertLessEqual(val[1], 1)
            self.assertLessEqual(val[2], 1)

    def test_annotations(self):
        annotations = json.loads(self.data.annotation(None, "obs"))
        self.assertEqual(
            annotations["names"],
            ["name", "n_genes", "percent_mito", "n_counts", "louvain"],
        )
        self.assertEqual(len(annotations["data"]), 2638)
        annotations = json.loads(self.data.annotation(None, "var"))
        self.assertEqual(annotations["names"], ["name", "n_cells"])
        self.assertEqual(len(annotations["data"]), 1838)

    def test_annotation_fields(self):
        annotations = json.loads(
            self.data.annotation(None, "obs", ["n_genes", "n_counts"]))
        self.assertEqual(annotations["names"], ["n_genes", "n_counts"])
        self.assertEqual(len(annotations["data"]), 2638)
        annotations = json.loads(self.data.annotation(None, "var", ["name"]))
        self.assertEqual(annotations["names"], ["name"])
        self.assertEqual(len(annotations["data"]), 1838)

    def test_filtered_annotation(self):
        filter_ = {
            "filter": {
                "obs": {
                    "annotation_value": [{
                        "name": "n_counts",
                        "min": 3000
                    }]
                },
                "var": {
                    "annotation_value": [{
                        "name": "name",
                        "values": ["ATAD3C", "RER1"]
                    }]
                },
            }
        }
        annotations = json.loads(self.data.annotation(filter_["filter"],
                                                      "obs"))
        self.assertEqual(
            annotations["names"],
            ["name", "n_genes", "percent_mito", "n_counts", "louvain"],
        )
        self.assertEqual(len(annotations["data"]), 497)
        annotations = json.loads(self.data.annotation(filter_["filter"],
                                                      "var"))
        self.assertEqual(annotations["names"], ["name", "n_cells"])
        self.assertEqual(len(annotations["data"]), 2)

    def test_filtered_layout(self):
        filter_ = {
            "filter": {
                "obs": {
                    "annotation_value": [{
                        "name": "n_counts",
                        "min": 3000
                    }]
                }
            }
        }
        layout = json.loads(self.data.layout(filter_["filter"]))
        self.assertEqual(len(layout["layout"]["coordinates"]), 497)

    def test_diffexp_topN(self):
        f1 = {"filter": {"obs": {"index": [[0, 500]]}}}
        f2 = {"filter": {"obs": {"index": [[500, 1000]]}}}
        result = json.loads(self.data.diffexp_topN(f1["filter"], f2["filter"]))
        self.assertEqual(len(result), 10)
        result = json.loads(
            self.data.diffexp_topN(f1["filter"], f2["filter"], 20))
        self.assertEqual(len(result), 20)

    def test_data_frame(self):
        data_frame_obs = json.loads(self.data.data_frame(None, "obs"))
        self.assertEqual(len(data_frame_obs["var"]), 1838)
        self.assertEqual(len(data_frame_obs["obs"]), 2638)
        data_frame_var = json.loads(self.data.data_frame(None, "var"))
        self.assertEqual(len(data_frame_var["var"]), 1838)
        self.assertEqual(len(data_frame_var["obs"]), 2638)

    def test_filtered_data_frame(self):
        filter_ = {
            "filter": {
                "obs": {
                    "annotation_value": [{
                        "name": "n_counts",
                        "min": 3000
                    }]
                }
            }
        }
        data_frame_obs = json.loads(
            self.data.data_frame(filter_["filter"], "obs"))
        self.assertEqual(len(data_frame_obs["var"]), 1838)
        self.assertEqual(len(data_frame_obs["obs"]), 497)
        self.assertIsInstance(data_frame_obs["obs"][0], (list, tuple))
        self.assertEqual(type(data_frame_obs["var"][0]), int)
        data_frame_var = json.loads(
            self.data.data_frame(filter_["filter"], "var"))
        self.assertEqual(len(data_frame_var["var"]), 1838)
        self.assertEqual(len(data_frame_var["obs"]), 497)
        self.assertIsInstance(data_frame_var["var"][0], (list, tuple))
        self.assertEqual(type(data_frame_var["obs"][0]), int)

    def test_data_single_gene(self):
        for axis in ["obs", "var"]:
            filter_ = {
                "filter": {
                    "var": {
                        "annotation_value": [{
                            "name": "name",
                            "values": ["RER1"]
                        }]
                    }
                }
            }
            data_frame_var = json.loads(
                self.data.data_frame(filter_["filter"], axis))
            if axis == "obs":
                self.assertEqual(type(data_frame_var["var"][0]), int)
                self.assertIsInstance(data_frame_var["obs"][0], (list, tuple))
            elif axis == "var":
                self.assertEqual(type(data_frame_var["obs"][0]), int)
                self.assertIsInstance(data_frame_var["var"][0], (list, tuple))

    if __name__ == "__main__":
        unittest.main()