Exemplo n.º 1
0
    def _add_data_sample(self, key, spec, spec_options=None):
        if len(spec.data_manager_keys) == 0:
            raise exceptions.InvalidRequest(
                "Please add at least one data manager for the data sample",
                400)
        datasets = [
            self._db.get(schemas.Type.Dataset, dataset_key)
            for dataset_key in spec.data_manager_keys
        ]

        data_sample_file_path = self._db.save_file(spec.path, key)
        data_sample = models.DataSample(
            key=key,
            owner=_BACKEND_ID,
            path=data_sample_file_path,
            data_manager_keys=spec.data_manager_keys,
            test_only=spec.test_only,
        )
        data_sample = self._db.add(data_sample)

        # update dataset(s) accordingly
        for dataset in datasets:
            if spec.test_only:
                samples_list = dataset.test_data_sample_keys
            else:
                samples_list = dataset.train_data_sample_keys
            if data_sample.key not in samples_list:
                samples_list.append(data_sample.key)

        return data_sample
Exemplo n.º 2
0
def compute_ranks(
        node_graph: typing.Dict[str, typing.List[str]],
        node_to_ignore: typing.Set[str] = None,
        ranks: typing.Dict[str, int] = None) -> typing.Dict[str, int]:
    """Compute the ranks of the nodes in the graph.

    Args:
        node_graph (typing.Dict[str, typing.List[str]]):
            Dict {node_id: list of nodes it depends on}.
            Node graph keys must not contain any node to ignore.
        node_to_ignore (typing.Set[str], optional): List of nodes to ignore.
            Defaults to None.
        ranks (typing.Dict[str, int]): Already computed ranks. Defaults to None.

    Raises:
        exceptions.InvalidRequest: If the node graph contains a cycle

    Returns:
        typing.Dict[str, int]: Dict { node_id : rank }
    """
    ranks = ranks or dict()
    visited = set()
    node_to_ignore = node_to_ignore or set()

    extra_nodes = set(node_graph.keys()).intersection(node_to_ignore)
    if len(extra_nodes) > 0:
        raise ValueError(
            f"node_graph keys should not contain any node to ignore: {extra_nodes}"
        )

    inverted_node_graph = _get_inverted_node_graph(node_graph, node_to_ignore)

    # Assign rank 0 to nodes without deps
    for node, dependencies in node_graph.items():
        if node not in node_to_ignore:
            actual_deps = [
                dep for dep in dependencies if dep not in node_to_ignore
            ]
            if len(actual_deps) == 0:
                ranks[node] = 0

    edges = set()

    while len(visited) != len(node_graph):
        current_node = _get_current_node(visited, ranks)
        visited.add(current_node)
        for child in inverted_node_graph.get(current_node, list()):
            ranks[child] = max(ranks[current_node] + 1, ranks.get(child, -1))

            # Cycle detection
            edge = (current_node, child)
            if (edge[1], edge[0]) in edges:
                raise exceptions.InvalidRequest(
                    f"missing dependency among inModels IDs, \
                        circular dependency between {edge[0]} and {edge[1]}",
                    400)
            else:
                edges.add(edge)

    return ranks
Exemplo n.º 3
0
def _uncompress(archive, to_):
    """Uncompress tar or zip archive to destination."""
    if tarfile.is_tarfile(archive):
        _untar(archive, to_)
    elif zipfile.is_zipfile(archive):
        _unzip(archive, to_)
    else:
        raise exceptions.InvalidRequest(f"Cannot uncompress '{archive}'", 400)
Exemplo n.º 4
0
    def _check_metadata(metadata: typing.Optional[typing.Dict[str, str]]):
        if metadata is not None:
            if any([len(key) > _MAX_LEN_KEY_METADATA for key in metadata]):
                raise exceptions.InvalidRequest(
                    "The key in metadata cannot be more than 50 characters",
                    400)
            if any([
                    len(value) > _MAX_LEN_VALUE_METADATA or len(value) == 0
                    for value in metadata.values()
            ]):
                raise exceptions.InvalidRequest(
                    "Values in metadata cannot be empty or more than 100 characters",
                    400)

        # In debug mode, the user can define the owner of the data
        if metadata is not None and DEBUG_OWNER in metadata:
            owner = metadata[DEBUG_OWNER]
        else:
            owner = _BACKEND_ID
        return owner
Exemplo n.º 5
0
def _get_current_node(visited, ranks):
    """Find the next node to visit: node with the minimum rank not yet visited."""
    current_node = None
    current_rank = None
    for node, rank in ranks.items():
        if node not in visited and (current_rank is None
                                    or rank < current_rank):
            current_node = node
            current_rank = rank
    # Failure means that there is a closed cycle: A -> B -> ... -> A
    if current_node is None:
        raise exceptions.InvalidRequest(
            "missing dependency among inModels IDs", 400)
    return current_node
Exemplo n.º 6
0
    def save_file(self, file_path: typing.Union[str, pathlib.Path], key: str):
        """Copy file or directory into the local temp dir to mimick
        the remote backend that saves the files given by the user.
        """
        tmp_directory = self.tmp_dir / key
        tmp_file = tmp_directory / pathlib.Path(file_path).name

        if not tmp_directory.exists():
            pathlib.Path.mkdir(tmp_directory)

        if tmp_file.exists():
            raise exceptions.AlreadyExists(
                f"File {tmp_file.name} already exists for asset {key}", 409)
        elif pathlib.Path(file_path).is_file():
            shutil.copyfile(file_path, tmp_file)
        elif pathlib.Path(file_path).is_dir():
            shutil.copytree(file_path, tmp_file)
        else:
            raise exceptions.InvalidRequest(f"Could not copy {file_path}", 400)
        return tmp_file
Exemplo n.º 7
0
    def _download_model_from_tuple(self, tuple_type, tuple_key, folder, head_trunk=None) -> None:
        """Download model to a destination file."""
        tuple = self._backend.get(tuple_type, tuple_key)

        if tuple_type == schemas.Type.CompositeTraintuple:
            if head_trunk == "head":
                model = tuple.out_head_model.out_model
            elif head_trunk == "trunk":
                model = tuple.out_trunk_model.out_model
            else:
                raise exceptions.InvalidRequest(
                    'head_trunk parameter must have value "head" or "trunk"'
                )
        else:
            model = tuple.out_model

        if not model:
            desc = f'{head_trunk} ' if head_trunk else ""
            msg = f'{tuple_type} {tuple_key}, status "{tuple.status}" has no {desc}out-model'
            raise exceptions.NotFound(msg, 404)

        self.download_model(model.key, folder)