Пример #1
0
def dataroot_test_index():
    # the following index page is meant for testing/debugging purposes
    data = '<!doctype html><html lang="en">'
    data += "<head><title>Hosted Cellxgene</title></head>"
    data += "<body><H1>Welcome to cellxgene</H1>"

    config = current_app.app_config
    server_config = config.server_config
    datasets = []
    for dataroot_dict in server_config.multi_dataset__dataroot.values():
        dataroot = dataroot_dict["dataroot"]
        url_dataroot = dataroot_dict["base_url"]
        locator = DataLocator(
            dataroot, region_name=server_config.data_locator__s3__region_name)
        for fname in locator.ls():
            location = path_join(dataroot, fname)
            try:
                MatrixDataLoader(location, app_config=config)
                datasets.append((url_dataroot, fname))
            except DatasetAccessError:
                # skip over invalid datasets
                pass

    data += "<br/>Select one of these datasets...<br/>"
    data += "<ul>"
    datasets.sort()
    for url_dataroot, dataset in datasets:
        data += f"<li><a href={url_dataroot}/{dataset}>{dataset}</a></li>"
    data += "</ul>"
    data += "</body></html>"

    return make_response(data)
Пример #2
0
 def isvalid(url):
     """
     Return True if this looks like a valid CXG, False if not.  Just a quick/cheap
     test, not to be fully trusted.
     """
     if not tiledb.object_type(url, ctx=CxgAdaptor.tiledb_ctx) == "group":
         return False
     if not tiledb.object_type(path_join(url, "obs"), ctx=CxgAdaptor.tiledb_ctx) == "array":
         return False
     if not tiledb.object_type(path_join(url, "var"), ctx=CxgAdaptor.tiledb_ctx) == "array":
         return False
     if not tiledb.object_type(path_join(url, "X"), ctx=CxgAdaptor.tiledb_ctx) == "array":
         return False
     if not tiledb.object_type(path_join(url, "emb"), ctx=CxgAdaptor.tiledb_ctx) == "group":
         return False
     return True
Пример #3
0
def get_data_adaptor(url_dataroot=None, dataset=None):
    config = current_app.app_config
    server_config = config.server_config
    dataset_key = None

    if dataset is None:
        datapath = server_config.single_dataset__datapath
    else:
        dataroot = None
        for key, dataroot_dict in server_config.multi_dataset__dataroot.items(
        ):
            if dataroot_dict["base_url"] == url_dataroot:
                dataroot = dataroot_dict["dataroot"]
                dataset_key = key
                break

        if dataroot is None:
            raise DatasetAccessError(
                f"Invalid dataset {url_dataroot}/{dataset}")
        datapath = path_join(dataroot, dataset)
        # path_join returns a normalized path.  Therefore it is
        # sufficient to check that the datapath starts with the
        # dataroot to determine that the datapath is under the dataroot.
        if not datapath.startswith(dataroot):
            raise DatasetAccessError(
                "Invalid dataset {url_dataroot}/{dataset}")

    if datapath is None:
        return common_rest.abort_and_log(HTTPStatus.BAD_REQUEST,
                                         "Invalid dataset NONE",
                                         loglevel=logging.INFO)

    cache_manager = current_app.matrix_data_cache_manager
    return cache_manager.data_adaptor(dataset_key, datapath, config)
Пример #4
0
def get_data_adaptor(dataset=None):
    config = current_app.app_config

    if dataset is None:
        datapath = config.single_dataset__datapath
    else:
        datapath = path_join(config.multi_dataset__dataroot, dataset)
        # path_join returns a normalized path.  Therefore it is
        # sufficient to check that the datapath starts with the
        # dataroot to determine that the datapath is under the dataroot.
        if not datapath.startswith(config.multi_dataset__dataroot):
            raise DatasetAccessError("Invalid dataset {dataset}")

    if datapath is None:
        return common_rest.abort_and_log(HTTPStatus.BAD_REQUEST,
                                         f"Invalid dataset NONE",
                                         loglevel=logging.INFO)

    cache_manager = current_app.matrix_data_cache_manager
    return cache_manager.data_adaptor(datapath, config)
Пример #5
0
def dataset_index(url_dataroot=None, dataset=None):
    app_config = current_app.app_config
    server_config = app_config.server_config
    if dataset is None:
        if app_config.is_multi_dataset():
            return dataroot_index()
        else:
            location = server_config.single_dataset__datapath
    else:
        dataroot = None
        for key, dataroot_dict in server_config.multi_dataset__dataroot.items(
        ):
            if dataroot_dict["base_url"] == url_dataroot:
                dataroot = dataroot_dict["dataroot"]
                break
        if dataroot is None:
            abort(HTTPStatus.NOT_FOUND)
        location = path_join(dataroot, dataset)

    dataset_config = app_config.get_dataset_config(url_dataroot)
    scripts = dataset_config.app__scripts
    inline_scripts = dataset_config.app__inline_scripts

    try:
        cache_manager = current_app.matrix_data_cache_manager
        with cache_manager.data_adaptor(url_dataroot, location,
                                        app_config) as data_adaptor:
            dataset_title = app_config.get_title(data_adaptor)
            return render_template("index.html",
                                   datasetTitle=dataset_title,
                                   SCRIPTS=scripts,
                                   INLINE_SCRIPTS=inline_scripts)
    except DatasetAccessError as e:
        return common_rest.abort_and_log(
            e.status_code,
            f"Invalid dataset {dataset}: {e.message}",
            loglevel=logging.INFO,
            include_exc_info=True)
Пример #6
0
    def _validate_and_initialize(self):
        """
        remember, preload_validation() has already been called, so
        no need to repeat anything it has done.

        Load the CXG "group" metadata and cache instance values.
        Be very aware of multiple versions of the CXG object.

        CXG versions in the wild:
        * version 0, aka "no version" -- can be detected by the lack
          of a cxg_group_metadata array.
        * version 0.1 -- metadata attache to cxg_group_metadata array.
          Same as 0, except it adds group metadata.
        """
        a_type = tiledb.object_type(path_join(self.url, "cxg_group_metadata"),
                                    ctx=self.tiledb_ctx)
        if a_type is None:
            # version 0
            cxg_version = "0.0"
            title = None
            about = None
        elif a_type == "array":
            # version >0
            gmd = self.open_array("cxg_group_metadata")
            cxg_version = gmd.meta["cxg_version"]
            if cxg_version == "0.1":
                cxg_properties = json.loads(gmd.meta["cxg_properties"])
                title = cxg_properties.get("title", None)
                about = cxg_properties.get("about", None)

        if cxg_version not in ["0.0", "0.1"]:
            raise DatasetAccessError(f"cxg matrix is not valid: {self.url}")

        self.title = title
        self.about = about
        self.cxg_version = cxg_version
Пример #7
0
def dataset_index(dataset=None):
    config = current_app.app_config
    if dataset is None:
        if config.single_dataset__datapath:
            location = config.single_dataset__datapath
        else:
            return dataroot_index()
    else:
        location = path_join(config.multi_dataset__dataroot, dataset)

    scripts = config.server__scripts

    try:
        cache_manager = current_app.matrix_data_cache_manager
        with cache_manager.data_adaptor(location, config) as data_adaptor:
            dataset_title = config.get_title(data_adaptor)
            return render_template("index.html",
                                   datasetTitle=dataset_title,
                                   SCRIPTS=scripts)
    except DatasetAccessError:
        return common_rest.abort_and_log(HTTPStatus.BAD_REQUEST,
                                         f"Invalid dataset {dataset}",
                                         loglevel=logging.INFO,
                                         include_exc_info=True)
Пример #8
0
 def get_path(self, *urls):
     return path_join(self.url, *urls)
Пример #9
0
 def has_array(self, name):
     a_type = tiledb.object_type(path_join(self.url, name), ctx=self.tiledb_ctx)
     return a_type == "array"