Exemplo n.º 1
0
    def _build_nn(self, graph: Graph, X, y):
        input_x = self._build_input(X)

        def build_model(layer, _, previous_layers):
            if not previous_layers:
                return layer(input_x)

            if len(previous_layers) > 1:
                incoming = concatenate(previous_layers)
            else:
                incoming = previous_layers[0]

            return layer(incoming)

        output_y = graph.apply(build_model) or [input_x]
        final_ouput = self._build_output(output_y, y)

        if "optimizer" not in self._compile_kwargs:
            self._compile_kwargs["optimizer"] = self.optimizer

        self._model = Model(inputs=input_x, outputs=final_ouput)
        self._model.compile(**self._compile_kwargs)
Exemplo n.º 2
0
def build_pipeline_graph(input: DataType, output: DataType, registry, max_list_depth=3, max_pipeline_width=3) -> "PipelineBuilder":
    """
    Creates a `PipelineBuilder` instance that generates all pipelines
    from `input` to `output` types.

    ##### Parameters

    - `input`: type descriptor for the desired input.
    - `output`: type descriptor for the desired output.
    - `registry`: list of available classes to build the pipelines.
    """
    
    # First we will unpack the input and output type and
    # store them in actual lists for easier use

    if isinstance(input, Tuple):
        input_type = list(input.inner)
    else:
        input_type = [input]

    if isinstance(output, Tuple):
        output_type = list(output.inner)
    else:
        output_type = [output]

    logger.info(f"input_type={input_type}")
    logger.info(f"output_type={output_type}")

    # Before starting, let's create all the List[T] wrappers up to 
    # `max_list_depth` and add them to `registry`, so that they are available later
    for algorithm in list(registry):
        for _ in range(max_list_depth):
            algorithm = make_list_wrapper(algorithm)
            registry.append(algorithm)

    # We will also need an index to quickly find out which algorithms
    # accept each input type
    index = defaultdict(set)

    for algorithm in registry:
        types = _get_annotations(algorithm).input
        types = list(types.inner) if isinstance(types, Tuple) else [types]

        for t in types:
            index[t].add(algorithm)

    logger.info(f"Built algorithm index with {len(index)} entries and {len(registry)} total algorithms.")

    # The graph contains all the algorithms, each algorithm is connected
    # to all those nodes that it can process, which are nodes whose output
    # type is a superset of what the algorithm requires.
    G = Graph()

    # For each node stored in the graph, we will store also the full list
    # of all inputs and outputs that we can guarantee are available at this point.
    # Initially we add the `Start` node, which produces all of the inputs,
    # and the `End` node which consumes all the outputs.
    start_node = PipelineStart(input_type)
    end_node = PipelineEnd(output_type)
    G.add_edge(GraphSpace.Start, start_node)
    G.add_edge(end_node, GraphSpace.End)

    # We will apply a BFS algorithm at this point. We will make sure
    # that once a node is processed, all the algorithms to which it could
    # potentially connect are stored in the graph.
    # Initially the `Start` node is the only one open.
    open_nodes = [start_node]
    closed_nodes = set()

    while open_nodes:
        # This is the next node we will need to connect.
        node = open_nodes.pop(0)

        if node in closed_nodes:
            continue

        # When leaving this node we can guarantee that we have the types in this list.
        types = node.output
        logger.info(f"Processing node={node}")

        # We will need this method to check if all of the input types of and algorithm are
        # guaranteed at this point, i.e., if they are available in `types`,
        # or at least a conforming type is.
        def type_is_guaranteed(input_type):
            for other_type in types:
                if conforms(other_type, input_type):
                    return True

            return False

        # In this point we have to identify all the algorithms that could continue
        # from this point on. These are all the algorithms whose input expects a subset
        # of the types that we already have.
        potential_algorithms = set()

        for t in types:
            potential_algorithms |= index[t]

        for algorithm in potential_algorithms:
            annotations = _get_annotations(algorithm)
            algorithm_input_types = list(annotations.input.inner) if isinstance(annotations.input, Tuple) else [annotations.input]
            algorithm_output_types = list(annotations.output.inner) if isinstance(annotations.output, Tuple) else [annotations.output]
            logger.info(f"Analyzing algorithm={algorithm.__name__} with inputs={algorithm_input_types} and outputs={algorithm_output_types}")

            if any(not type_is_guaranteed(input_type) for input_type in algorithm_input_types):
                logger.info(f"Skipping algorithm={algorithm.__name__}")
                continue
                    
            # At this point we can add the current algorithm to the graph.
            # First, we make the current algorithm "consume" the input types,
            # hence, the output types produced at this point are the output types
            # this algorithm provides plus any input type not consumed so far.
            output_types = sorted(set([t for t in types if t not in algorithm_input_types] + algorithm_output_types), key=str)

            if len(output_types) > max_pipeline_width:
                continue
            
            # We add this node to the graph and we mark that it consumes the inputs,
            # so that later when sampling we can correctly align all the types.
            # When building the node, we can get a `ValueError` if the internal
            # grammar cannot be built; in that case, we simply skip it
            try:
                new_node = PipelineNode(algorithm=algorithm, input=types, output=output_types)
                G.add_node(new_node)
                G.add_edge(node, new_node)
                open_nodes.append(new_node)
                logger.info(f"Adding node={algorithm.__name__} producing types={output_types}")
            except ValueError as e:
                logger.warning(f"Node={algorithm.__name__} cannot be built. Error={e}.")           

        # Let's check if we can add the `End` node.
        if all(type_is_guaranteed(t) for t in output_type):
            G.add_edge(node, end_node)
            logger.info("Connecting to end node")
            
        closed_nodes.add(node)

    # Once done we have to check if the `End` node was at some point included in the graph.
    # Otherwise that means there is no possible path.
    if GraphSpace.End not in G:
        raise TypeError(
            "No pipelines can be constructed from input:%r to output:%r."
            % (input, output)
        )

    # Now we remove all nodes that don't participate in any path
    # leaving to `End`
    reachable_from_end = set(nx.dfs_preorder_nodes(G.reverse(False), GraphSpace.End))
    unreachable_nodes = set(G.nodes) - reachable_from_end
    G.remove_nodes_from(unreachable_nodes)

    # If the node `Start` was removed, that means the graph is disconnected.
    if not GraphSpace.Start in G:
        raise TypeError(
            "No pipelines can be constructed from input:%r to output:%r."
            % (input, output)
        )

    return PipelineBuilder(G, registry)
Exemplo n.º 3
0
def build_pipelines(input, output, registry) -> "PipelineBuilder":
    """
    Creates a `PipelineBuilder` instance that generates all pipelines
    from `input` to `output` types.

    ##### Parameters

    - `input`: type descriptor for the desired input.
    - `output`: type descriptor for the desired output.
    - `registry`: list of available classes to build the pipelines.
    """

    # warnings.warn(
    #     "This method is deprecated and not under use by AutoGOAL's"
    #     " internal API anymore, use `build_pipeline_graph` instead.",
    #     category=DeprecationWarning,
    #     stacklevel=2,
    # )

    list_pairs = set()
    types_queue = []

    if isinstance(input, Tuple):
        types_queue.extend(input.inner)
    else:
        types_queue.append(input)

    if isinstance(output, Tuple):
        types_queue.extend(output.inner)
    else:
        types_queue.append(output)

    types_seen = set()

    while types_queue:
        output_type = types_queue.pop(0)

        def build(internal_output, depth):
            if internal_output in types_seen:
                return

            for other_clss in registry:
                annotations = _get_annotations(other_clss)

                if annotations in list_pairs:
                    continue

                other_input = annotations.input
                other_output = annotations.output

                if other_input == other_output:
                    continue

                if not conforms(internal_output, other_input):
                    continue

                other_wrapper = build_composite_list(other_input, other_output, depth)
                list_pairs.add(annotations)
                registry.append(other_wrapper)
                types_queue.append(_get_annotations(other_wrapper).output)

        depth = 0

        while isinstance(output_type, List):
            if output_type.depth() >= MAX_LIST_DEPTH:
                break

            depth += 1

            output_type = output_type.inner
            build(output_type, depth)
            types_seen.add(output_type)

            logger.debug("Output type", output_type)

    list_tuples = set()

    def connect_tuple_wrappers(node, output_type):
        if not isinstance(output_type, Tuple):
            return

        for index in range(0, len(output_type.inner)):
            internal_input = output_type.inner[index]

            for other_clss in registry:
                annotations = _get_annotations(other_clss)
                other_input = annotations.input

                if not (conforms(internal_input, other_input) and other_clss != node):
                    continue

                # `other_class` has input compatible with one element in the Tuple
                # build the output `Tuple[..., internal_output, ...]` of the wrapper class
                internal_output = annotations.output
                output_tuple = list(output_type.inner)
                output_tuple[index] = internal_output
                output_tuple_type = Tuple(*output_tuple)

                # dynamic class representing the wrapper algorithm
                if (index, output_type, output_tuple_type) in list_tuples:
                    continue

                other_wrapper = build_composite_tuple(
                    index, output_type, output_tuple_type
                )
                list_tuples.add((index, output_type, output_tuple_type))
                registry.append(other_wrapper)

                open_nodes.append(other_wrapper)
                G.add_edge(node, other_wrapper)

    G = Graph()

    open_nodes = []
    closed_nodes = set()

    # Enqueue open nodes
    for clss in registry:
        if conforms(input, _get_annotations(clss).input):
            open_nodes.append(clss)
            G.add_edge(GraphSpace.Start, clss)

    connect_tuple_wrappers(GraphSpace.Start, input)

    if GraphSpace.Start not in G:
        raise TypeError("There are no classes compatible with input type:%r." % input)

    while open_nodes:
        clss = open_nodes.pop(0)

        if clss in closed_nodes:
            continue

        closed_nodes.add(clss)
        output_type = _get_annotations(clss).output

        for other_clss in registry:
            other_input = _get_annotations(other_clss).input
            if conforms(output_type, other_input) and other_clss != clss:
                open_nodes.append(other_clss)
                G.add_edge(clss, other_clss)

        connect_tuple_wrappers(clss, output_type)

        if conforms(output_type, output):
            G.add_edge(clss, GraphSpace.End)

    if GraphSpace.End not in G:
        raise TypeError(
            "No pipelines can be constructed from input:%r to output:%r."
            % (input, output)
        )

    reachable_from_end = set(nx.dfs_preorder_nodes(G.reverse(False), GraphSpace.End))
    unreachable_nodes = set(G.nodes) - reachable_from_end
    G.remove_nodes_from(unreachable_nodes)

    if not GraphSpace.Start in G:
        raise TypeError(
            "No pipelines can be constructed from input:%r to output:%r."
            % (input, output)
        )

    return PipelineBuilder(G, registry)
Exemplo n.º 4
0
def build_pipeline_graph(
    input_types: List[type],
    output_type: type,
    registry: List[type],
    max_list_depth: int = 3,
) -> PipelineSpace:
    """Build a graph of algorithms.

    Every node in the graph corresponds to a <autogoal.grammar.ContextFreeGrammar> that
    generates an instance of a class with a `run` method.

    Each `run` method must declare input and output types in the form:

        def run(self, a: type_1, b: type_2, ...) -> type_n:
            # ...
    """

    if not isinstance(input_types, (list, tuple)):
        input_types = [input_types]

    # We start by enlarging the registry with all Seq[...] algorithms

    pool = set(registry)

    for algorithm in registry:
        for _ in range(max_list_depth):
            algorithm = make_seq_algorithm(algorithm)
            pool.add(algorithm)

    # For building the graph, we'll keep at each node the guaranteed output types

    # We start by collecting all the possible input nodes,
    # those that can process a subset of the input_types
    open_nodes: List[PipelineNode] = []

    for algorithm in pool:
        if not algorithm.is_compatible_with(input_types):
            continue

        open_nodes.append(
            PipelineNode(
                algorithm=algorithm,
                input_types=input_types,
                output_types=set(input_types) | set([algorithm.output_type()]),
                registry=registry,
            ))

    G = Graph()

    for node in open_nodes:
        G.add_edge(GraphSpace.Start, node)

    # We'll make a BFS exploration of the pipeline space.
    # For every open node we will add to the graph every node to which it can connect.
    closed_nodes = set()

    while open_nodes:
        node = open_nodes.pop(0)

        # These are the types that are available at this node
        guaranteed_types = node.output_types

        # Here are all the algorithms that could be added new at this point in the graph
        for algorithm in registry:
            if not algorithm.is_compatible_with(guaranteed_types):
                continue

            # We never want to apply the same exact algorithm twice
            if algorithm == node.algorithm:
                continue

            # And we never want an algorithm that doesn't provide a novel output type...
            if (algorithm.output_type() in guaranteed_types and
                    # ... unless it is an idempotent algorithm
                [algorithm.output_type()] != algorithm.input_types()):
                continue

            p = PipelineNode(
                algorithm=algorithm,
                input_types=guaranteed_types,
                output_types=guaranteed_types | set([algorithm.output_type()]),
                registry=registry,
            )

            G.add_edge(node, p)

            if p not in closed_nodes and p not in open_nodes:
                open_nodes.append(p)

        # Now we check to see if this node is a possible output
        if issubclass(node.algorithm.output_type(), output_type):
            G.add_edge(node, GraphSpace.End)

        closed_nodes.add(node)

    # Remove all nodes that are not connected to the end node
    try:
        reachable_from_end = set(
            nx.dfs_preorder_nodes(G.reverse(False), GraphSpace.End))
        unreachable_nodes = set(G.nodes) - reachable_from_end
        G.remove_nodes_from(unreachable_nodes)
    except KeyError:
        raise TypeError("No pipelines can be found!")

    return PipelineSpace(G, input_types=input_types)