def data_with_tmp_annotations(ext: MatrixDataType, annotations_fixture=False): tmp_dir = tempfile.mkdtemp() annotations_file = path.join(tmp_dir, "test_annotations.csv") if annotations_fixture: shutil.copyfile(f"{FIXTURES_ROOT}/pbmc3k-annotations.csv", annotations_file) fname = { MatrixDataType.H5AD: f"{PROJECT_ROOT}/example-dataset/pbmc3k.h5ad", MatrixDataType.CXG: f"{FIXTURES_ROOT}/pbmc3k.cxg", }[ext] data_locator = DataLocator(fname) config = AppConfig() config.update_server_config( app__flask_secret_key="secret", single_dataset__obs_names=None, single_dataset__var_names=None, single_dataset__datapath=data_locator.path, ) config.update_default_dataset_config( embeddings__names=["umap"], presentation__max_categories=100, diffexp__lfc_cutoff=0.01, ) config.complete_config() data = MatrixDataLoader(data_locator.abspath()).open(config) annotations = AnnotationsLocalFile(None, annotations_file) return data, tmp_dir, annotations
def data_with_tmp_tiledb_annotations(ext: MatrixDataType): tmp_dir = tempfile.mkdtemp() fname = { MatrixDataType.H5AD: f"{PROJECT_ROOT}/example-dataset/pbmc3k.h5ad", MatrixDataType.CXG: "test/fixtures/pbmc3k.cxg", }[ext] data_locator = DataLocator(fname) config = AppConfig() config.update_server_config( app__flask_secret_key="secret", multi_dataset__dataroot=data_locator.path, authentication__type="test", authentication__insecure_test_environment=True, ) config.update_default_dataset_config( embeddings__names=["umap"], presentation__max_categories=100, diffexp__lfc_cutoff=0.01, user_annotations__type="hosted_tiledb_array", user_annotations__hosted_tiledb_array__db_uri="postgresql://*****:*****@localhost:5432", user_annotations__hosted_tiledb_array__hosted_file_directory=tmp_dir, ) config.complete_config() data = MatrixDataLoader(data_locator.abspath()).open(config) annotations = AnnotationsHostedTileDB(tmp_dir, DbUtils("postgresql://*****:*****@localhost:5432"),) return data, tmp_dir, annotations
def get_basic_config(self): config = AppConfig() config.update_server_config( single_dataset__obs_names=None, single_dataset__var_names=None, ) config.update_server_config(app__flask_secret_key="secret") config.update_default_dataset_config( embeddings__names=["umap"], presentation__max_categories=100, diffexp__lfc_cutoff=0.01, ) return config
def app_config(data_locator, backed=False, extra_server_config={}, extra_dataset_config={}): config = AppConfig() config.update_server_config( app__flask_secret_key="secret", single_dataset__obs_names=None, single_dataset__var_names=None, adaptor__anndata_adaptor__backed=backed, single_dataset__datapath=data_locator, limits__diffexp_cellcount_max=None, limits__column_request_max=None, ) config.update_default_dataset_config( embeddings__names=["umap", "tsne", "pca"], presentation__max_categories=100, diffexp__lfc_cutoff=0.01 ) config.update_server_config(**extra_server_config) config.update_default_dataset_config(**extra_dataset_config) config.complete_config() return config
def test_auth_none(self): app_config = AppConfig() app_config.update_server_config(app__flask_secret_key="secret") app_config.update_server_config( authentication__type=None, multi_dataset__dataroot=self.dataset_dataroot) app_config.update_default_dataset_config( user_annotations__enable=False) app_config.complete_config() with test_server(app_config=app_config) as server: session = requests.Session() config = session.get( f"{server}/d/pbmc3k.cxg/api/v0.2/config").json() userinfo = session.get( f"{server}/d/pbmc3k.cxg/api/v0.2/userinfo").json() self.assertNotIn("authentication", config["config"]) self.assertIsNone(userinfo)
def test_auth_session(self): app_config = AppConfig() app_config.update_server_config(app__flask_secret_key="secret") app_config.update_server_config( authentication__type="session", multi_dataset__dataroot=self.dataset_dataroot) app_config.update_default_dataset_config(user_annotations__enable=True) app_config.complete_config() with test_server(app_config=app_config) as server: session = requests.Session() config = session.get( f"{server}/d/pbmc3k.cxg/api/v0.2/config").json() userinfo = session.get( f"{server}/d/pbmc3k.cxg/api/v0.2/userinfo").json() self.assertFalse( config["config"]["authentication"]["requires_client_login"]) self.assertTrue(userinfo["userinfo"]["is_authenticated"]) self.assertEqual(userinfo["userinfo"]["username"], "anonymous")
def test_update_app_config(self): config = AppConfig() config.update_server_config(app__verbose=True, multi_dataset__dataroot="datadir") vars = config.server_config.changes_from_default() self.assertCountEqual(vars, [("app__verbose", True, False), ("multi_dataset__dataroot", "datadir", None)]) config = AppConfig() config.update_default_dataset_config(app__scripts=(), app__inline_scripts=()) vars = config.server_config.changes_from_default() self.assertCountEqual(vars, []) config = AppConfig() config.update_default_dataset_config(app__scripts=[], app__inline_scripts=[]) vars = config.default_dataset_config.changes_from_default() self.assertCountEqual(vars, []) config = AppConfig() config.update_default_dataset_config(app__scripts=("a", "b"), app__inline_scripts=["c", "d"]) vars = config.default_dataset_config.changes_from_default() self.assertCountEqual(vars, [("app__scripts", ["a", "b"], []), ("app__inline_scripts", ["c", "d"], [])])
def launch( datapath, dataroot, verbose, debug, open_browser, port, host, embedding, obs_names, var_names, max_category_items, disable_custom_colors, diffexp_lfc_cutoff, title, scripts, about, disable_annotations, annotations_file, annotations_dir, backed, disable_diffexp, experimental_annotations_ontology, experimental_annotations_ontology_obo, experimental_enable_reembedding, config_file, dump_default_config, ): """Launch the cellxgene data viewer. This web app lets you explore single-cell expression data. Data must be in a format that cellxgene expects. Read the "getting started" guide to learn more: https://chanzuckerberg.github.io/cellxgene/getting-started.html Examples: > cellxgene launch example-dataset/pbmc3k.h5ad --title pbmc3k > cellxgene launch <your data file> --title <your title> > cellxgene launch <url>""" # TODO Examples to provide when "--dataroot" is unhidden # > cellxgene launch --dataroot example-dataset/ # # > cellxgene launch --dataroot <url> if dump_default_config: print(default_config) sys.exit(0) # Startup message click.echo("[cellxgene] Starting the CLI...") # app config app_config = AppConfig() server_config = app_config.server_config try: if config_file: app_config.update_from_config_file(config_file) # Determine which config options were give on the command line. # Those will override the ones provided in the config file (if provided). cli_config = AppConfig() cli_config.update_server_config( app__verbose=verbose, app__debug=debug, app__host=host, app__port=port, app__open_browser=open_browser, single_dataset__datapath=datapath, single_dataset__title=title, single_dataset__about=about, single_dataset__obs_names=obs_names, single_dataset__var_names=var_names, multi_dataset__dataroot=dataroot, adaptor__anndata_adaptor__backed=backed, ) cli_config.update_default_dataset_config( app__scripts=scripts, user_annotations__enable=not disable_annotations, user_annotations__local_file_csv__file=annotations_file, user_annotations__local_file_csv__directory=annotations_dir, user_annotations__ontology__enable= experimental_annotations_ontology, user_annotations__ontology__obo_location= experimental_annotations_ontology_obo, presentation__max_categories=max_category_items, presentation__custom_colors=not disable_custom_colors, embeddings__names=embedding, embeddings__enable_reembedding=experimental_enable_reembedding, diffexp__enable=not disable_diffexp, diffexp__lfc_cutoff=diffexp_lfc_cutoff, ) diff = cli_config.server_config.changes_from_default() changes = {key: val for key, val, _ in diff} app_config.update_server_config(**changes) diff = cli_config.default_dataset_config.changes_from_default() changes = {key: val for key, val, _ in diff} app_config.update_default_dataset_config(**changes) # process the configuration # any errors will be thrown as an exception. # any info messages will be passed to the messagefn function. def messagefn(message): click.echo("[cellxgene] " + message) # Use a default secret if one is not provided if not server_config.app__flask_secret_key: app_config.update_server_config( app__flask_secret_key="SparkleAndShine") app_config.complete_config(messagefn) except (ConfigurationError, DatasetAccessError) as e: raise click.ClickException(e) handle_scripts(scripts) # create the server server = CliLaunchServer(app_config) if not server_config.app__verbose: log = logging.getLogger("werkzeug") log.setLevel(logging.ERROR) cellxgene_url = f"http://{app_config.server_config.app__host}:{app_config.server_config.app__port}" if server_config.app__open_browser: click.echo( f"[cellxgene] Launching! Opening your browser to {cellxgene_url} now." ) webbrowser.open(cellxgene_url) else: click.echo( f"[cellxgene] Launching! Please go to {cellxgene_url} in your browser." ) click.echo("[cellxgene] Type CTRL-C at any time to exit.") if not server_config.app__verbose: f = open(os.devnull, "w") sys.stdout = f try: server.app.run( host=server_config.app__host, debug=server_config.app__debug, port=server_config.app__port, threaded=not server_config.app__debug, use_debugger=False, use_reloader=False, ) except OSError as e: if e.errno == errno.EADDRINUSE: raise click.ClickException( "Port is in use, please specify an open port using the --port flag." ) from e raise
def test_multi_dataset(self): config = AppConfig() # test for illegal url_dataroots for illegal in ("../b", "!$*", "\\n", "", "(bad)"): config.update_server_config( app__flask_secret_key="secret", multi_dataset__dataroot={"tag": {"base_url": illegal, "dataroot": f"{PROJECT_ROOT}/example-dataset"}}, ) with self.assertRaises(ConfigurationError): config.complete_config() # test for legal url_dataroots for legal in ("d", "this.is-okay_", "a/b"): config.update_server_config( app__flask_secret_key="secret", multi_dataset__dataroot={"tag": {"base_url": legal, "dataroot": f"{PROJECT_ROOT}/example-dataset"}}, ) config.complete_config() # test that multi dataroots work end to end config.update_server_config( app__flask_secret_key="secret", multi_dataset__dataroot=dict( s1=dict(dataroot=f"{PROJECT_ROOT}/example-dataset", base_url="set1/1/2"), s2=dict(dataroot=f"{FIXTURES_ROOT}", base_url="set2"), s3=dict(dataroot=f"{FIXTURES_ROOT}", base_url="set3"), ), ) # Change this default to test if the dataroot overrides below work. config.update_default_dataset_config(app__about_legal_tos="tos_default.html") # specialize the configs for set1 config.add_dataroot_config( "s1", user_annotations__enable=False, diffexp__enable=True, app__about_legal_tos="tos_set1.html" ) # specialize the configs for set2 config.add_dataroot_config( "s2", user_annotations__enable=True, diffexp__enable=False, app__about_legal_tos="tos_set2.html" ) # no specializations for set3 (they get the default dataset config) config.complete_config() with test_server(app_config=config) as server: session = requests.Session() response = session.get(f"{server}/set1/1/2/pbmc3k.h5ad/api/v0.2/config") data_config = response.json() assert data_config["config"]["displayNames"]["dataset"] == "pbmc3k" assert data_config["config"]["parameters"]["annotations"] is False assert data_config["config"]["parameters"]["disable-diffexp"] is False assert data_config["config"]["parameters"]["about_legal_tos"] == "tos_set1.html" response = session.get(f"{server}/set2/pbmc3k.cxg/api/v0.2/config") data_config = response.json() assert data_config["config"]["displayNames"]["dataset"] == "pbmc3k" assert data_config["config"]["parameters"]["annotations"] is True assert data_config["config"]["parameters"]["about_legal_tos"] == "tos_set2.html" response = session.get(f"{server}/set3/pbmc3k.cxg/api/v0.2/config") data_config = response.json() assert data_config["config"]["displayNames"]["dataset"] == "pbmc3k" assert data_config["config"]["parameters"]["annotations"] is True assert data_config["config"]["parameters"]["disable-diffexp"] is False assert data_config["config"]["parameters"]["about_legal_tos"] == "tos_default.html" response = session.get(f"{server}/health") assert response.json()["status"] == "pass"
has_config = True else: logging.critical(f"Configuration file not found {config_file}") sys.exit(1) if not has_config: logging.critical("No config file found") sys.exit(1) dataroot = os.getenv("CXG_DATAROOT") if dataroot: logging.info("Configuration from CXG_DATAROOT") app_config.update_server_config(multi_dataset__dataroot=dataroot) # overwrite configuration for the eb app app_config.update_default_dataset_config(embeddings__enable_reembedding=False,) app_config.update_server_config(multi_dataset__allowed_matrix_types=["cxg"],) # complete config app_config.complete_config(logging.info) server = WSGIServer(app_config) debug = False application = server.app except Exception: logging.critical("Caught exception during initialization", exc_info=True) sys.exit(1) if app_config.is_multi_dataset(): logging.info(f"starting server with multi_dataset__dataroot={app_config.server_config.multi_dataset__dataroot}")
class TestServerConfig(ConfigTests): def setUp(self): self.config_file_name = f"{unittest.TestCase.id(self).split('.')[-1]}.yml" self.config = AppConfig() self.config.update_server_config(app__flask_secret_key="secret") self.config.update_server_config(multi_dataset__dataroot=FIXTURES_ROOT) self.server_config = self.config.server_config self.config.complete_config() message_list = [] def noop(message): message_list.append(message) messagefn = noop self.context = dict(messagefn=messagefn, messages=message_list) def get_config(self, **kwargs): file_name = self.custom_app_config( dataroot=f"{FIXTURES_ROOT}", config_file_name=self.config_file_name, **kwargs ) config = AppConfig() config.update_from_config_file(file_name) return config def test_init_raises_error_if_default_config_is_invalid(self): invalid_config = self.get_config(port="not_valid") with self.assertRaises(ConfigurationError): invalid_config.complete_config() @patch("backend.czi_hosted.common.config.server_config.BaseConfig.validate_correct_type_of_configuration_attribute") def test_complete_config_checks_all_attr(self, mock_check_attrs): mock_check_attrs.side_effect = BaseConfig.validate_correct_type_of_configuration_attribute() self.server_config.complete_config(self.context) self.assertEqual(mock_check_attrs.call_count, 41) def test_handle_app__throws_error_if_port_doesnt_exist(self): config = self.get_config(port=99999999) with self.assertRaises(ConfigurationError): config.server_config.handle_app(self.context) @patch("backend.czi_hosted.common.config.server_config.discover_s3_region_name") def test_handle_data_locator_works_for_default_types(self, mock_discover_region_name): mock_discover_region_name.return_value = None # Default config self.assertEqual(self.config.server_config.data_locator__s3__region_name, None) # hard coded config = self.get_config() self.assertEqual(config.server_config.data_locator__s3__region_name, "us-east-1") # incorrectly formatted dataroot = { "d1": {"base_url": "set1", "dataroot": "/path/to/set1_datasets/"}, "d2": {"base_url": "set2/subdir", "dataroot": "s3://shouldnt/work"}, } file_name = self.custom_app_config( dataroot=dataroot, config_file_name=self.config_file_name, data_locater_region_name="true" ) config = AppConfig() config.update_from_config_file(file_name) with self.assertRaises(ConfigurationError): config.server_config.handle_data_locator() @patch("backend.czi_hosted.common.config.server_config.discover_s3_region_name") def test_handle_data_locator_can_read_from_dataroot(self, mock_discover_region_name): mock_discover_region_name.return_value = "us-west-2" dataroot = { "d1": {"base_url": "set1", "dataroot": "/path/to/set1_datasets/"}, "d2": {"base_url": "set2/subdir", "dataroot": "s3://hosted-cellxgene-dev"}, } file_name = self.custom_app_config( dataroot=dataroot, config_file_name=self.config_file_name, data_locater_region_name="true" ) config = AppConfig() config.update_from_config_file(file_name) config.server_config.handle_data_locator() self.assertEqual(config.server_config.data_locator__s3__region_name, "us-west-2") mock_discover_region_name.assert_called_once_with("s3://hosted-cellxgene-dev") def test_handle_app___can_use_envar_port(self): config = self.get_config(port=24) self.assertEqual(config.server_config.app__port, 24) # Note if the port is set in the config file it will NOT be overwritten by a different envvar os.environ["CXG_SERVER_PORT"] = "4008" self.config = AppConfig() self.config.update_server_config(app__flask_secret_key="secret") self.config.server_config.handle_app(self.context) self.assertEqual(self.config.server_config.app__port, 4008) del os.environ["CXG_SERVER_PORT"] def test_handle_app__can_get_secret_key_from_envvar_or_config_file_with_envvar_given_preference(self): config = self.get_config(flask_secret_key="KEY_FROM_FILE") self.assertEqual(config.server_config.app__flask_secret_key, "KEY_FROM_FILE") os.environ["CXG_SECRET_KEY"] = "KEY_FROM_ENV" config.external_config.handle_environment(self.context) self.assertEqual(config.server_config.app__flask_secret_key, "KEY_FROM_ENV") def test_handle_app__sets_web_base_url(self): config = self.get_config(web_base_url="anything.com") self.assertEqual(config.server_config.app__web_base_url, "anything.com") def test_handle_auth__gets_client_secret_from_envvars_or_config_with_envvars_given_preference(self): config = self.get_config(client_secret="KEY_FROM_FILE") config.server_config.handle_authentication() self.assertEqual(config.server_config.authentication__params_oauth__client_secret, "KEY_FROM_FILE") os.environ["CXG_OAUTH_CLIENT_SECRET"] = "KEY_FROM_ENV" config.external_config.handle_environment(self.context) self.assertEqual(config.server_config.authentication__params_oauth__client_secret, "KEY_FROM_ENV") def test_handle_data_source__errors_when_passed_zero_or_two_dataroots(self): file_name = self.custom_app_config( dataroot=f"{FIXTURES_ROOT}", config_file_name="two_data_roots.yml", dataset_datapath=f"{FIXTURES_ROOT}/pbmc3k-CSC-gz.h5ad", ) config = AppConfig() config.update_from_config_file(file_name) with self.assertRaises(ConfigurationError): config.server_config.handle_data_source() file_name = self.custom_app_config(config_file_name="zero_roots.yml") config = AppConfig() config.update_from_config_file(file_name) with self.assertRaises(ConfigurationError): config.server_config.handle_data_source() def test_get_api_base_url_works(self): # test the api_base_url feature, and that it can contain a path config = AppConfig() backend_port = find_available_port("localhost", 10000) config.update_server_config( app__flask_secret_key="secret", app__api_base_url=f"http://*****:*****@patch("backend.czi_hosted.common.config.server_config.diffexp_tiledb.set_config") def test_handle_diffexp(self, mock_tiledb_config): custom_config_file = self.custom_app_config( dataroot=f"{FIXTURES_ROOT}", cpu_multiplier=3, diffexp_max_workers=1, target_workunit=4, config_file_name=self.config_file_name, ) config = AppConfig() config.update_from_config_file(custom_config_file) config.server_config.handle_diffexp() # called with the min of diffexp_max_workers and cpus*cpu_multiplier mock_tiledb_config.assert_called_once_with(1, 4) @patch("backend.czi_hosted.data_cxg.cxg_adaptor.CxgAdaptor.set_tiledb_context") def test_handle_adaptor(self, mock_tiledb_context): custom_config = self.custom_app_config( dataroot=f"{FIXTURES_ROOT}", cxg_tile_cache_size=10, cxg_num_reader_threads=2 ) config = AppConfig() config.update_from_config_file(custom_config) config.server_config.handle_adaptor() mock_tiledb_context.assert_called_once_with( {"sm.tile_cache_size": 10, "sm.num_reader_threads": 2, "vfs.s3.region": "us-east-1"} ) def test_test_auth_only_in_insecure(self): config = self.get_config(auth_type="test") with self.assertRaises(ConfigurationError): config.complete_config() config.update_server_config(authentication__insecure_test_environment=True) config.complete_config()