def _build_nn(self, graph: Graph, X, y): input_x = self._build_input(X) def build_model(layer, _, previous_layers): if not previous_layers: return layer(input_x) if len(previous_layers) > 1: incoming = concatenate(previous_layers) else: incoming = previous_layers[0] return layer(incoming) output_y = graph.apply(build_model) or [input_x] final_ouput = self._build_output(output_y, y) if "optimizer" not in self._compile_kwargs: self._compile_kwargs["optimizer"] = self.optimizer self._model = Model(inputs=input_x, outputs=final_ouput) self._model.compile(**self._compile_kwargs)
def build_pipeline_graph(input: DataType, output: DataType, registry, max_list_depth=3, max_pipeline_width=3) -> "PipelineBuilder": """ Creates a `PipelineBuilder` instance that generates all pipelines from `input` to `output` types. ##### Parameters - `input`: type descriptor for the desired input. - `output`: type descriptor for the desired output. - `registry`: list of available classes to build the pipelines. """ # First we will unpack the input and output type and # store them in actual lists for easier use if isinstance(input, Tuple): input_type = list(input.inner) else: input_type = [input] if isinstance(output, Tuple): output_type = list(output.inner) else: output_type = [output] logger.info(f"input_type={input_type}") logger.info(f"output_type={output_type}") # Before starting, let's create all the List[T] wrappers up to # `max_list_depth` and add them to `registry`, so that they are available later for algorithm in list(registry): for _ in range(max_list_depth): algorithm = make_list_wrapper(algorithm) registry.append(algorithm) # We will also need an index to quickly find out which algorithms # accept each input type index = defaultdict(set) for algorithm in registry: types = _get_annotations(algorithm).input types = list(types.inner) if isinstance(types, Tuple) else [types] for t in types: index[t].add(algorithm) logger.info(f"Built algorithm index with {len(index)} entries and {len(registry)} total algorithms.") # The graph contains all the algorithms, each algorithm is connected # to all those nodes that it can process, which are nodes whose output # type is a superset of what the algorithm requires. G = Graph() # For each node stored in the graph, we will store also the full list # of all inputs and outputs that we can guarantee are available at this point. # Initially we add the `Start` node, which produces all of the inputs, # and the `End` node which consumes all the outputs. start_node = PipelineStart(input_type) end_node = PipelineEnd(output_type) G.add_edge(GraphSpace.Start, start_node) G.add_edge(end_node, GraphSpace.End) # We will apply a BFS algorithm at this point. We will make sure # that once a node is processed, all the algorithms to which it could # potentially connect are stored in the graph. # Initially the `Start` node is the only one open. open_nodes = [start_node] closed_nodes = set() while open_nodes: # This is the next node we will need to connect. node = open_nodes.pop(0) if node in closed_nodes: continue # When leaving this node we can guarantee that we have the types in this list. types = node.output logger.info(f"Processing node={node}") # We will need this method to check if all of the input types of and algorithm are # guaranteed at this point, i.e., if they are available in `types`, # or at least a conforming type is. def type_is_guaranteed(input_type): for other_type in types: if conforms(other_type, input_type): return True return False # In this point we have to identify all the algorithms that could continue # from this point on. These are all the algorithms whose input expects a subset # of the types that we already have. potential_algorithms = set() for t in types: potential_algorithms |= index[t] for algorithm in potential_algorithms: annotations = _get_annotations(algorithm) algorithm_input_types = list(annotations.input.inner) if isinstance(annotations.input, Tuple) else [annotations.input] algorithm_output_types = list(annotations.output.inner) if isinstance(annotations.output, Tuple) else [annotations.output] logger.info(f"Analyzing algorithm={algorithm.__name__} with inputs={algorithm_input_types} and outputs={algorithm_output_types}") if any(not type_is_guaranteed(input_type) for input_type in algorithm_input_types): logger.info(f"Skipping algorithm={algorithm.__name__}") continue # At this point we can add the current algorithm to the graph. # First, we make the current algorithm "consume" the input types, # hence, the output types produced at this point are the output types # this algorithm provides plus any input type not consumed so far. output_types = sorted(set([t for t in types if t not in algorithm_input_types] + algorithm_output_types), key=str) if len(output_types) > max_pipeline_width: continue # We add this node to the graph and we mark that it consumes the inputs, # so that later when sampling we can correctly align all the types. # When building the node, we can get a `ValueError` if the internal # grammar cannot be built; in that case, we simply skip it try: new_node = PipelineNode(algorithm=algorithm, input=types, output=output_types) G.add_node(new_node) G.add_edge(node, new_node) open_nodes.append(new_node) logger.info(f"Adding node={algorithm.__name__} producing types={output_types}") except ValueError as e: logger.warning(f"Node={algorithm.__name__} cannot be built. Error={e}.") # Let's check if we can add the `End` node. if all(type_is_guaranteed(t) for t in output_type): G.add_edge(node, end_node) logger.info("Connecting to end node") closed_nodes.add(node) # Once done we have to check if the `End` node was at some point included in the graph. # Otherwise that means there is no possible path. if GraphSpace.End not in G: raise TypeError( "No pipelines can be constructed from input:%r to output:%r." % (input, output) ) # Now we remove all nodes that don't participate in any path # leaving to `End` reachable_from_end = set(nx.dfs_preorder_nodes(G.reverse(False), GraphSpace.End)) unreachable_nodes = set(G.nodes) - reachable_from_end G.remove_nodes_from(unreachable_nodes) # If the node `Start` was removed, that means the graph is disconnected. if not GraphSpace.Start in G: raise TypeError( "No pipelines can be constructed from input:%r to output:%r." % (input, output) ) return PipelineBuilder(G, registry)
def build_pipelines(input, output, registry) -> "PipelineBuilder": """ Creates a `PipelineBuilder` instance that generates all pipelines from `input` to `output` types. ##### Parameters - `input`: type descriptor for the desired input. - `output`: type descriptor for the desired output. - `registry`: list of available classes to build the pipelines. """ # warnings.warn( # "This method is deprecated and not under use by AutoGOAL's" # " internal API anymore, use `build_pipeline_graph` instead.", # category=DeprecationWarning, # stacklevel=2, # ) list_pairs = set() types_queue = [] if isinstance(input, Tuple): types_queue.extend(input.inner) else: types_queue.append(input) if isinstance(output, Tuple): types_queue.extend(output.inner) else: types_queue.append(output) types_seen = set() while types_queue: output_type = types_queue.pop(0) def build(internal_output, depth): if internal_output in types_seen: return for other_clss in registry: annotations = _get_annotations(other_clss) if annotations in list_pairs: continue other_input = annotations.input other_output = annotations.output if other_input == other_output: continue if not conforms(internal_output, other_input): continue other_wrapper = build_composite_list(other_input, other_output, depth) list_pairs.add(annotations) registry.append(other_wrapper) types_queue.append(_get_annotations(other_wrapper).output) depth = 0 while isinstance(output_type, List): if output_type.depth() >= MAX_LIST_DEPTH: break depth += 1 output_type = output_type.inner build(output_type, depth) types_seen.add(output_type) logger.debug("Output type", output_type) list_tuples = set() def connect_tuple_wrappers(node, output_type): if not isinstance(output_type, Tuple): return for index in range(0, len(output_type.inner)): internal_input = output_type.inner[index] for other_clss in registry: annotations = _get_annotations(other_clss) other_input = annotations.input if not (conforms(internal_input, other_input) and other_clss != node): continue # `other_class` has input compatible with one element in the Tuple # build the output `Tuple[..., internal_output, ...]` of the wrapper class internal_output = annotations.output output_tuple = list(output_type.inner) output_tuple[index] = internal_output output_tuple_type = Tuple(*output_tuple) # dynamic class representing the wrapper algorithm if (index, output_type, output_tuple_type) in list_tuples: continue other_wrapper = build_composite_tuple( index, output_type, output_tuple_type ) list_tuples.add((index, output_type, output_tuple_type)) registry.append(other_wrapper) open_nodes.append(other_wrapper) G.add_edge(node, other_wrapper) G = Graph() open_nodes = [] closed_nodes = set() # Enqueue open nodes for clss in registry: if conforms(input, _get_annotations(clss).input): open_nodes.append(clss) G.add_edge(GraphSpace.Start, clss) connect_tuple_wrappers(GraphSpace.Start, input) if GraphSpace.Start not in G: raise TypeError("There are no classes compatible with input type:%r." % input) while open_nodes: clss = open_nodes.pop(0) if clss in closed_nodes: continue closed_nodes.add(clss) output_type = _get_annotations(clss).output for other_clss in registry: other_input = _get_annotations(other_clss).input if conforms(output_type, other_input) and other_clss != clss: open_nodes.append(other_clss) G.add_edge(clss, other_clss) connect_tuple_wrappers(clss, output_type) if conforms(output_type, output): G.add_edge(clss, GraphSpace.End) if GraphSpace.End not in G: raise TypeError( "No pipelines can be constructed from input:%r to output:%r." % (input, output) ) reachable_from_end = set(nx.dfs_preorder_nodes(G.reverse(False), GraphSpace.End)) unreachable_nodes = set(G.nodes) - reachable_from_end G.remove_nodes_from(unreachable_nodes) if not GraphSpace.Start in G: raise TypeError( "No pipelines can be constructed from input:%r to output:%r." % (input, output) ) return PipelineBuilder(G, registry)
def build_pipeline_graph( input_types: List[type], output_type: type, registry: List[type], max_list_depth: int = 3, ) -> PipelineSpace: """Build a graph of algorithms. Every node in the graph corresponds to a <autogoal.grammar.ContextFreeGrammar> that generates an instance of a class with a `run` method. Each `run` method must declare input and output types in the form: def run(self, a: type_1, b: type_2, ...) -> type_n: # ... """ if not isinstance(input_types, (list, tuple)): input_types = [input_types] # We start by enlarging the registry with all Seq[...] algorithms pool = set(registry) for algorithm in registry: for _ in range(max_list_depth): algorithm = make_seq_algorithm(algorithm) pool.add(algorithm) # For building the graph, we'll keep at each node the guaranteed output types # We start by collecting all the possible input nodes, # those that can process a subset of the input_types open_nodes: List[PipelineNode] = [] for algorithm in pool: if not algorithm.is_compatible_with(input_types): continue open_nodes.append( PipelineNode( algorithm=algorithm, input_types=input_types, output_types=set(input_types) | set([algorithm.output_type()]), registry=registry, )) G = Graph() for node in open_nodes: G.add_edge(GraphSpace.Start, node) # We'll make a BFS exploration of the pipeline space. # For every open node we will add to the graph every node to which it can connect. closed_nodes = set() while open_nodes: node = open_nodes.pop(0) # These are the types that are available at this node guaranteed_types = node.output_types # Here are all the algorithms that could be added new at this point in the graph for algorithm in registry: if not algorithm.is_compatible_with(guaranteed_types): continue # We never want to apply the same exact algorithm twice if algorithm == node.algorithm: continue # And we never want an algorithm that doesn't provide a novel output type... if (algorithm.output_type() in guaranteed_types and # ... unless it is an idempotent algorithm [algorithm.output_type()] != algorithm.input_types()): continue p = PipelineNode( algorithm=algorithm, input_types=guaranteed_types, output_types=guaranteed_types | set([algorithm.output_type()]), registry=registry, ) G.add_edge(node, p) if p not in closed_nodes and p not in open_nodes: open_nodes.append(p) # Now we check to see if this node is a possible output if issubclass(node.algorithm.output_type(), output_type): G.add_edge(node, GraphSpace.End) closed_nodes.add(node) # Remove all nodes that are not connected to the end node try: reachable_from_end = set( nx.dfs_preorder_nodes(G.reverse(False), GraphSpace.End)) unreachable_nodes = set(G.nodes) - reachable_from_end G.remove_nodes_from(unreachable_nodes) except KeyError: raise TypeError("No pipelines can be found!") return PipelineSpace(G, input_types=input_types)