Exemplo n.º 1
0
 def paths_between(self, a: DRS, b: DRS, primitives, max_hops=2) -> DRS:
     """
     Is there a transitive relationship between any element in a with any element in b?
     This functions finds the answer constrained on the primitive (singular for now) that is passed
     as a parameter.
     :param a:
     :param b:
     :param primitives:
     :return:
     """
     assert(a.mode == b.mode)
     o_drs = DRS([], Operation(OP.NONE))
     o_drs.absorb_provenance(a)
     o_drs.absorb_provenance(b)
     if a.mode == DRSMode.FIELDS:
         for h1 in a:  # h1 is a Hit
             for h2 in b:  # h2 is a Hit
                 if h1 == h2:
                     return o_drs  # same source and target field
                 res_drs = self.__network.find_path_hit(h1, h2, primitives, max_hops=max_hops)
                 o_drs = o_drs.absorb(res_drs)
     elif a.mode == DRSMode.TABLE:
         for h1 in a:  # h1 is a table: str
             for h2 in b:  # h2 is a table: str
                 if h1 == h2:
                     return o_drs  # same source ant target table
                 res_drs = self.__network.find_path_table(
                     h1, h2, primitives, self, max_hops=max_hops)
                 o_drs = o_drs.absorb(res_drs)
     return o_drs
Exemplo n.º 2
0
    def __traverse(self, a: DRS, primitive, max_hops=2) -> DRS:
        """
        Conduct a breadth first search of nodes matching a primitive, starting
        with an initial DRS.
        :param a: a nid, node, tuple, or DRS
        :param primitive: The element to search
        :max_hops: maximum number of rounds on the graph
        """
        a = self._general_to_drs(a)

        o_drs = DRS([], Operation(OP.NONE))

        if a.mode == DRSMode.TABLE:
            raise ValueError(
                'input mode DRSMode.TABLE not supported')

        fringe = a
        o_drs.absorb_provenance(a)
        while max_hops > 0:
            max_hops = max_hops - 1
            for h in fringe:
                hits_drs = self._network.neighbors_id(h, primitive)
                o_drs = self.union(o_drs, hits_drs)
            fringe = o_drs  # grow the initial input
        return o_drs
Exemplo n.º 3
0
 def traverse(self, a: DRS, primitives, max_hops) -> DRS:
     o_drs = DRS([], Operation(OP.NONE))
     if a.mode == DRSMode.TABLE:
         print("ERROR: input mode TABLE not supported")
         return []
     fringe = [x for x in a]
     o_drs.absorb_provenance(a)
     while max_hops > 0:
         max_hops = max_hops - 1
         for h in fringe:
             hits_drs = self.__network.neighbors_id(h, primitives)
             o_drs = self.union(o_drs, hits_drs)
         fringe = [x for x in o_drs]  # grow the initial input
     return o_drs
Exemplo n.º 4
0
    def paths(self,
              drs_a: DRS,
              drs_b: DRS,
              relation=Relation.PKFK,
              max_hops=2) -> DRS:
        """
        Is there a transitive relationship between any element in a with any
        element in b?
        This function finds the answer constrained on the primitive
        (singular for now) that is passed as a parameter.
        If b is not passed, assumes the user is searching for paths between
        elements in a.
        :param a: DRS
        :param b: DRS
        :param Relation: Relation
        :return:
        """
        # create b if it wasn't passed in.
        drs_a = self._general_to_drs(drs_a)
        drs_b = self._general_to_drs(drs_b)

        self._assert_same_mode(drs_a, drs_b)

        # absorb the provenance of both a and b
        o_drs = DRS([], Operation(OP.NONE))
        o_drs.absorb_provenance(drs_a)
        if drs_b != drs_a:
            o_drs.absorb_provenance(drs_b)

        for h1, h2 in itertools.product(drs_a, drs_b):

            # there are different network operations for table and field mode
            res_drs = None
            if drs_a.mode == DRSMode.FIELDS:
                res_drs = self._network.find_path_hit(h1,
                                                      h2,
                                                      relation,
                                                      max_hops=max_hops)
            else:
                res_drs = self._network.find_path_table(h1,
                                                        h2,
                                                        relation,
                                                        self,
                                                        max_hops=max_hops)

            o_drs = o_drs.absorb(res_drs)

        return o_drs
Exemplo n.º 5
0
    def __neighbor_search(self,
                        input_data,
                        relation: Relation):
        """
        Given an nid, node, hit or DRS, finds neighbors with specified
        relation.
        :param nid, node tuple, Hit, or DRS:
        """
        # convert whatever input to a DRS
        i_drs = self._general_to_drs(input_data)

        # prepare an output DRS
        o_drs = DRS([], Operation(OP.NONE))
        o_drs = o_drs.absorb_provenance(i_drs)

        # get all of the table Hits in a DRS, if necessary.
        if i_drs.mode == DRSMode.TABLE:
            self._general_to_field_drs(i_drs)

        # Check neighbors
        if not relation.from_metadata():
            for h in i_drs:
                hits_drs = self._network.neighbors_id(h, relation)
                o_drs = o_drs.absorb(hits_drs)
        else:
            md_relation = self._relation_to_mdrelation(relation)
            for h in i_drs:
                neighbors = self.md_search(h, md_relation)
                hits_drs = self._network.md_neighbors_id(h, neighbors, relation)
                o_drs = o_drs.absorb(hits_drs)
        return o_drs
Exemplo n.º 6
0
 def schema_neighbors_of(self, i_drs: DRS) -> DRS:
     o_drs = DRS([], Operation(OP.NONE))
     o_drs = o_drs.absorb_provenance(i_drs)
     if i_drs.mode == DRSMode.TABLE:
         i_drs.set_fields_mode()
         for h in i_drs:
             fields_table = self.drs_from_table_hit(h)
             i_drs = i_drs.absorb(fields_table)
     for h in i_drs:
         hits = self.__network.get_hits_from_table(h.source_name)
         hits_drs = DRS([x for x in hits], Operation(OP.TABLE, params=[h]))
         o_drs = o_drs.absorb(hits_drs)
     return o_drs
Exemplo n.º 7
0
 def similar_content_to(self, i_drs: DRS) -> DRS:
     """
     Given a DRS it returns another DRS that contains all fields similar to the fields of the input
     :param i_drs: the input DRS
     :return: DRS
     """
     o_drs = DRS([], Operation(OP.NONE))
     o_drs = o_drs.absorb_provenance(i_drs)
     if i_drs.mode == DRSMode.TABLE:
         i_drs.set_fields_mode()
         for h in i_drs:
             fields_table = self.drs_from_table_hit(h)
             i_drs = i_drs.absorb(fields_table)
     for h in i_drs:
         hits_drs = self.__network.neighbors_id(h, Relation.CONTENT_SIM)
         o_drs = o_drs.absorb(hits_drs)
     return o_drs
Exemplo n.º 8
0
 def pkfk_of(self, i_drs: DRS) -> DRS:
     """
     Given a DRS it returns another DRS that contains all fields similar to the fields of the input
     :param i_drs: the input DRS
     :return: DRS
     """
     # alternative provenance propagation
     o_drs = DRS([], Operation(OP.NONE))
     o_drs = o_drs.absorb_provenance(i_drs)
     if i_drs.mode == DRSMode.TABLE:
         i_drs.set_fields_mode()
         for h in i_drs:
             fields_table = self.drs_from_table_hit(h)
             i_drs = i_drs.absorb(fields_table)
             # o_drs.extend_provenance(fields_drs)
     for h in i_drs:
         hits_drs = self.__network.neighbors_id(h, Relation.PKFK)
         o_drs = o_drs.absorb(hits_drs)
     # o_drs.extend_provenance(i_drs)
     return o_drs
Exemplo n.º 9
0
    def test_absorb_provenance(self):
        print(self._testMethodName)

        # DRS 1
        h0 = Hit(10, "dba", "table_c", "v", -1)

        h1 = Hit(0, "dba", "table_a", "a", -1)
        h2 = Hit(1, "dba", "table_a", "b", -1)
        h3 = Hit(2, "dba", "table_b", "c", -1)
        h4 = Hit(3, "dba", "table_b", "d", -1)
        drs1 = DRS([h1, h2, h3, h4], Operation(OP.CONTENT_SIM, params=[h0]))

        # DRS 2
        h5 = Hit(1, "dba", "table_a", "b", -1)

        h6 = Hit(16, "dba", "table_d", "a", -1)
        h7 = Hit(17, "dba", "table_d", "b", -1)
        drs2 = DRS([h6, h7], Operation(OP.SCHEMA_SIM, params=[h5]))

        drs = drs1.absorb_provenance(drs2)

        prov_graph = drs.get_provenance().prov_graph()
        nodes = prov_graph.nodes()
        print("NODES")
        for n in nodes:
            print(str(n))
        print(" ")
        edges = prov_graph.edges(keys=True)
        print("EDGES")
        for e in edges:
            print(str(e))
        print(" ")

        init_data = set([x for x in drs1])
        merged_data = set([x for x in drs])
        new_data = init_data - merged_data

        print("Len must be 0: " + str(len(new_data)))

        self.assertTrue(len(new_data) == 0)
Exemplo n.º 10
0
 def paths(self, a: DRS, primitives) -> DRS:
     """
     Is there any transitive relationship between any two elements in a?
     This function finds the answer constrained on the primitive (singular for now) passed as parameter
     :param a:
     :param primitives:
     :return:
     """
     o_drs = DRS([], Operation(OP.NONE))
     o_drs = o_drs.absorb_provenance(a)
     if a.mode == DRSMode.FIELDS:
         for h1 in a:  # h1 is a Hit
             for h2 in a:  # h2 is a Hit
                 if h1 == h2:
                     continue
                 res_drs = self.__network.find_path_hit(h1, h2, primitives)
                 o_drs = o_drs.absorb(res_drs)
     elif a.mode == DRSMode.TABLE:
         for h1 in a:  # h1 is a table: str
             for h2 in a:  # h2 is a table: str
                 res_drs = self.__network.find_path_table(
                     h1, h2, primitives, self)
                 o_drs = o_drs.absorb(res_drs)
     return o_drs