示例#1
0
    def get_strengthening_constraint(self, input_graph: Graph) -> Graph:
        strengthened_input_graph = Graph()
        for constraint in self.constraints.values():
            strengthened_input_graph.merge(
                constraint.get_strengthening_constraint(input_graph))

        return strengthened_input_graph
示例#2
0
    def _get_canonical_query_plans(self,
                                   sequence: List[str],
                                   transformation: Transformation) -> Dict[Skeleton, Set[QueryPlan]]:

        meta_plan = self._meta_plans[transformation]
        blueprint_item_lists = self._get_blueprint_item_lists(sequence,
                                                              meta_plan,
                                                              _d=len(sequence))
        canonical_transformation = meta_plan.canonical_transformations[len(sequence)]
        mapping = next(canonical_transformation.get_subgraph_mappings(transformation))

        skeletons_to_plans: Dict[Skeleton, Set[QueryPlan]] = collections.defaultdict(set)

        for blueprint_item_list in blueprint_item_lists:
            #  Breakdown the overall transformation in terms of the unit plans contained in the blueprint items.
            #  Store the connections between them as a graph mapping.
            connections = GraphMapping()
            connections.update(mapping)
            graph = Graph()
            for item in blueprint_item_list:
                graph.merge(item.unit.transformation)
                connections = connections.apply_mapping(item.canonical_mapping, only_keys=True)

                if item.border_mapping:
                    connections.update(item.border_mapping)
                    connections = connections.apply_mapping(connections, only_values=True)

            #  Assemble the query plan
            query_plan = QueryPlan(transformation,
                                   units=[item.unit.transformation for item in blueprint_item_list],
                                   all_connections=connections,
                                   strengthenings=[item.unit.strengthenings[component_name]
                                                   for component_name, item in zip(sequence, blueprint_item_list)])

            #  Obtain the skeletons for which this query plan would work.
            #  External inputs are negative integers. See gauss.synthesis.skeleton for details.
            ent_to_idx = {ent: -idx for idx, ent in enumerate(transformation.get_input_entities(), 1)}
            possible_arg_ints_lists = []
            for component_name, (idx, item) in zip(sequence, enumerate(blueprint_item_list, 1)):
                #  Get the mapped entities to the inputs of this unit's transformation, and look up their idx values.
                arg_ints = [ent_to_idx[connections.m_ent[ent]] for ent in item.unit.transformation.get_input_entities()]

                #  Get all the permutations as well.
                arg_ints_list = [arg_num_mapping.apply_list(arg_ints)
                                 for arg_num_mapping in item.unit.component_entries[component_name].argument_mappings]

                possible_arg_ints_lists.append(arg_ints_list)
                ent_to_idx[item.unit.transformation.get_output_entity()] = idx

            #  The skeletons are then simply the all the combinations
            for arg_ints_list in itertools.product(*possible_arg_ints_lists):
                skeleton = Skeleton(list(zip(sequence, arg_ints_list)))
                skeletons_to_plans[skeleton].add(query_plan)

        return skeletons_to_plans
示例#3
0
    def prepare_solution(self, output: Any, output_graph: Graph) -> Solution:
        if self.problem.input_names is not None:
            int_to_names: Dict[int, str] = {
                -idx: name
                for idx, name in enumerate(self.problem.input_names, 1)
            }
        else:
            int_to_names: Dict[int, str] = {
                -idx: f"inp{idx}"
                for idx in range(1,
                                 len(self.problem.inputs) + 1)
            }

        int_to_names[self.skeleton.length] = self.problem.output_name

        graph = Graph()
        for g in self.graphs:
            graph.merge(g)

        #  Perform transitive closure w.r.t the nodes corresponding to the intermediate outputs
        #  and take the induced subgraph containing all nodes except those
        if self.skeleton.length > 1:
            join_nodes = set.union(*(set(self.int_to_graph[i].iter_nodes())
                                     for i in range(1, self.skeleton.length)))
            self.domain.perform_transitive_closure(graph,
                                                   join_nodes=join_nodes)
            graph = graph.induced_subgraph(keep_nodes=set(graph.iter_nodes()) -
                                           join_nodes)

        return self.domain.prepare_solution(
            self.problem.inputs,
            output,
            graph,
            self.problem.graph_inputs,
            output_graph,
            self.enumeration_items,
            arguments=[arg_ints for (comp_name, arg_ints) in self.skeleton],
            int_to_names=int_to_names,
            int_to_obj=self.int_to_val)
示例#4
0
    def _solve_for_skeleton_recursive(
            self,
            problem: SynthesisProblem,
            skeleton: Skeleton,
            query_plans: QueryPlans,
            context: SolverContext,
            _depth: int = 0) -> Iterator[Tuple[Any, Graph]]:

        domain = self._domain
        component_name, arg_ints = skeleton[_depth]
        inputs, g_inputs = context.get_arguments(depth=_depth)
        inp_entities = [
            next(iter(g_inp.iter_entities())) for g_inp in g_inputs
        ]
        inp_graph = Graph()
        for g_inp in g_inputs:
            inp_graph.merge(g_inp)

        #  Get the strengthening constraint for this depth.
        #  Specifically, for every query, get the intersection of the strengthenings of all the query plans for that
        #  query at this particular depth. Then take the union of all of these.
        #  In other words, this strengthening constraint is a graph containing the nodes, edges, tags and tagged edges
        #  that must be satisfied by the graph containing the inputs, that is `inp_graph` in this context.
        #  This constraint can then be used by the `enumerate` procedure to speed up the search.
        strengthening_constraint: Graph = context.waypoints[
            _depth].get_strengthening_constraint(inp_graph)
        enumeration_item: EnumerationItem
        for enumeration_item in domain.enumerate(
                component_name=component_name,
                inputs=inputs,
                g_inputs=g_inputs,
                constants=problem.constants,
                strengthening_constraint=strengthening_constraint):

            output = enumeration_item.output
            c_graph = enumeration_item.graph
            o_graph = enumeration_item.o_graph

            # for g in g_inputs:
            #     assert set(g.iter_nodes()).issubset(set(c_graph.iter_nodes()))

            if problem.timeout is not None and time.time(
            ) - self._time_start > problem.timeout:
                raise TimeoutError("Exceeded time limit.")

            out_entity = next(iter(o_graph.iter_entities()))
            c_graph.add_node(PlaceholderNode(entity=out_entity))
            c_graph = Transformation.build_from_graph(
                c_graph, input_entities=inp_entities, output_entity=out_entity)

            #  Check if the returned graph is consistent with the query plans.
            if not context.check_validity(c_graph, depth=_depth):
                continue

            #  Prepare for the next round.
            context.step(output=output,
                         graph=c_graph,
                         output_graph=o_graph,
                         enumeration_item=enumeration_item,
                         depth=_depth)

            if _depth == skeleton.length - 1:
                #  This was the last component, prepare the program and return it along with the final output and graph.
                yield output, o_graph

            else:
                #  Move on to the next component.
                yield from self._solve_for_skeleton_recursive(problem,
                                                              skeleton,
                                                              query_plans,
                                                              context,
                                                              _depth=_depth +
                                                              1)
    def init(self):
        domain = PandasLiteSynthesisDomain()
        replay = {k: iter(v) for k, v in self.replay_map.items()}
        graph = Graph()

        g_inputs = self._g_inputs = [
            self._convert_inp_to_graph(inp) for inp in self.inputs
        ]
        int_to_val = {-idx: inp for idx, inp in enumerate(self.inputs, 1)}
        int_to_graph = {-idx: g_inp for idx, g_inp in enumerate(g_inputs, 1)}

        #  Run the generators to extract the programs and graphs for each component call.
        #  Merge the individual graphs into the master graph.
        call_strs: List[str] = []
        for idx, (component_name, arg_ints) in enumerate(self.skeleton, 1):
            c_inputs = [int_to_val[i] for i in arg_ints]
            g_c_inputs = [int_to_graph[i] for i in arg_ints]
            output, program, c_graph, output_graph = next(
                domain.enumerate(component_name,
                                 c_inputs,
                                 g_c_inputs,
                                 replay=replay))
            int_to_val[idx] = output
            int_to_graph[idx] = output_graph
            call_strs.append(program)
            graph.merge(c_graph)

        #  Check that the final output is equivalent to the original output specified in the benchmark.
        assert domain.check_equivalent(self.output, int_to_val[self.skeleton.length]), \
            f"Generated output inconsistent with specified output in Pandas benchmark {self.b_id}"

        #  Retrofit the value of the output entity to the original output
        cur_out_entity = next(ent for ent in graph.iter_entities()
                              if ent.value is int_to_val[self.skeleton.length])
        cur_out_entity.value = self.output

        #  Perform transitive closure w.r.t the nodes corresponding to the intermediate outputs
        #  and take the induced subgraph containing all nodes except those
        if self.skeleton.length > 1:
            join_nodes = set.union(*(set(int_to_graph[i].iter_nodes())
                                     for i in range(1, self.skeleton.length)))
            domain.perform_transitive_closure(graph, join_nodes=join_nodes)
            intent_graph = graph.induced_subgraph(
                keep_nodes=set(graph.iter_nodes()) - join_nodes)
        else:
            intent_graph = graph

        self._graph = intent_graph

        #  Also construct the string representation of the ground-truth program.
        program_list: List[str] = []
        for depth, (call_str,
                    (component_name,
                     arg_ints)) in enumerate(zip(call_strs, self.skeleton), 1):
            arg_strs = [f"inp{-i}" if i < 0 else f"v{i}" for i in arg_ints]
            call_str = call_str.format(**{
                f"inp{idx}": arg_str
                for idx, arg_str in enumerate(arg_strs, 1)
            })
            if depth == self.skeleton.length:
                program_list.append(call_str)
            else:
                program_list.append(f"v{depth} = {call_str}")

        self.program = "\n".join(program_list)