def dataroot_test_index(): # the following index page is meant for testing/debugging purposes data = '<!doctype html><html lang="en">' data += "<head><title>Hosted Cellxgene</title></head>" data += "<body><H1>Welcome to cellxgene</H1>" config = current_app.app_config server_config = config.server_config datasets = [] for dataroot_dict in server_config.multi_dataset__dataroot.values(): dataroot = dataroot_dict["dataroot"] url_dataroot = dataroot_dict["base_url"] locator = DataLocator( dataroot, region_name=server_config.data_locator__s3__region_name) for fname in locator.ls(): location = path_join(dataroot, fname) try: MatrixDataLoader(location, app_config=config) datasets.append((url_dataroot, fname)) except DatasetAccessError: # skip over invalid datasets pass data += "<br/>Select one of these datasets...<br/>" data += "<ul>" datasets.sort() for url_dataroot, dataset in datasets: data += f"<li><a href={url_dataroot}/{dataset}>{dataset}</a></li>" data += "</ul>" data += "</body></html>" return make_response(data)
def isvalid(url): """ Return True if this looks like a valid CXG, False if not. Just a quick/cheap test, not to be fully trusted. """ if not tiledb.object_type(url, ctx=CxgAdaptor.tiledb_ctx) == "group": return False if not tiledb.object_type(path_join(url, "obs"), ctx=CxgAdaptor.tiledb_ctx) == "array": return False if not tiledb.object_type(path_join(url, "var"), ctx=CxgAdaptor.tiledb_ctx) == "array": return False if not tiledb.object_type(path_join(url, "X"), ctx=CxgAdaptor.tiledb_ctx) == "array": return False if not tiledb.object_type(path_join(url, "emb"), ctx=CxgAdaptor.tiledb_ctx) == "group": return False return True
def get_data_adaptor(url_dataroot=None, dataset=None): config = current_app.app_config server_config = config.server_config dataset_key = None if dataset is None: datapath = server_config.single_dataset__datapath else: dataroot = None for key, dataroot_dict in server_config.multi_dataset__dataroot.items( ): if dataroot_dict["base_url"] == url_dataroot: dataroot = dataroot_dict["dataroot"] dataset_key = key break if dataroot is None: raise DatasetAccessError( f"Invalid dataset {url_dataroot}/{dataset}") datapath = path_join(dataroot, dataset) # path_join returns a normalized path. Therefore it is # sufficient to check that the datapath starts with the # dataroot to determine that the datapath is under the dataroot. if not datapath.startswith(dataroot): raise DatasetAccessError( "Invalid dataset {url_dataroot}/{dataset}") if datapath is None: return common_rest.abort_and_log(HTTPStatus.BAD_REQUEST, "Invalid dataset NONE", loglevel=logging.INFO) cache_manager = current_app.matrix_data_cache_manager return cache_manager.data_adaptor(dataset_key, datapath, config)
def get_data_adaptor(dataset=None): config = current_app.app_config if dataset is None: datapath = config.single_dataset__datapath else: datapath = path_join(config.multi_dataset__dataroot, dataset) # path_join returns a normalized path. Therefore it is # sufficient to check that the datapath starts with the # dataroot to determine that the datapath is under the dataroot. if not datapath.startswith(config.multi_dataset__dataroot): raise DatasetAccessError("Invalid dataset {dataset}") if datapath is None: return common_rest.abort_and_log(HTTPStatus.BAD_REQUEST, f"Invalid dataset NONE", loglevel=logging.INFO) cache_manager = current_app.matrix_data_cache_manager return cache_manager.data_adaptor(datapath, config)
def dataset_index(url_dataroot=None, dataset=None): app_config = current_app.app_config server_config = app_config.server_config if dataset is None: if app_config.is_multi_dataset(): return dataroot_index() else: location = server_config.single_dataset__datapath else: dataroot = None for key, dataroot_dict in server_config.multi_dataset__dataroot.items( ): if dataroot_dict["base_url"] == url_dataroot: dataroot = dataroot_dict["dataroot"] break if dataroot is None: abort(HTTPStatus.NOT_FOUND) location = path_join(dataroot, dataset) dataset_config = app_config.get_dataset_config(url_dataroot) scripts = dataset_config.app__scripts inline_scripts = dataset_config.app__inline_scripts try: cache_manager = current_app.matrix_data_cache_manager with cache_manager.data_adaptor(url_dataroot, location, app_config) as data_adaptor: dataset_title = app_config.get_title(data_adaptor) return render_template("index.html", datasetTitle=dataset_title, SCRIPTS=scripts, INLINE_SCRIPTS=inline_scripts) except DatasetAccessError as e: return common_rest.abort_and_log( e.status_code, f"Invalid dataset {dataset}: {e.message}", loglevel=logging.INFO, include_exc_info=True)
def _validate_and_initialize(self): """ remember, preload_validation() has already been called, so no need to repeat anything it has done. Load the CXG "group" metadata and cache instance values. Be very aware of multiple versions of the CXG object. CXG versions in the wild: * version 0, aka "no version" -- can be detected by the lack of a cxg_group_metadata array. * version 0.1 -- metadata attache to cxg_group_metadata array. Same as 0, except it adds group metadata. """ a_type = tiledb.object_type(path_join(self.url, "cxg_group_metadata"), ctx=self.tiledb_ctx) if a_type is None: # version 0 cxg_version = "0.0" title = None about = None elif a_type == "array": # version >0 gmd = self.open_array("cxg_group_metadata") cxg_version = gmd.meta["cxg_version"] if cxg_version == "0.1": cxg_properties = json.loads(gmd.meta["cxg_properties"]) title = cxg_properties.get("title", None) about = cxg_properties.get("about", None) if cxg_version not in ["0.0", "0.1"]: raise DatasetAccessError(f"cxg matrix is not valid: {self.url}") self.title = title self.about = about self.cxg_version = cxg_version
def dataset_index(dataset=None): config = current_app.app_config if dataset is None: if config.single_dataset__datapath: location = config.single_dataset__datapath else: return dataroot_index() else: location = path_join(config.multi_dataset__dataroot, dataset) scripts = config.server__scripts try: cache_manager = current_app.matrix_data_cache_manager with cache_manager.data_adaptor(location, config) as data_adaptor: dataset_title = config.get_title(data_adaptor) return render_template("index.html", datasetTitle=dataset_title, SCRIPTS=scripts) except DatasetAccessError: return common_rest.abort_and_log(HTTPStatus.BAD_REQUEST, f"Invalid dataset {dataset}", loglevel=logging.INFO, include_exc_info=True)
def get_path(self, *urls): return path_join(self.url, *urls)
def has_array(self, name): a_type = tiledb.object_type(path_join(self.url, name), ctx=self.tiledb_ctx) return a_type == "array"