Пример #1
0
    def _right_matrix_multiplication(self, n_matrix, other):
        other = other.astype(np.float, order='C')
        s = n_matrix.ent_table
        k = n_matrix.kfkds
        r = n_matrix.att_table

        ns = k[0].shape[0]
        nr = [t.shape[0] for t in r]
        nk = len(k)
        nw = other.shape[0]
        res = [np.zeros((nw, t.shape[0]), dtype=float) for t in r]

        comp.group(ns, nk, nw, k, nr, other, res)

        return np.hstack(([other * s if sp.issparse(s) else other.
                           dot(s)] if s.shape[1] != 0 else []) +
                         [(res[i] * t if sp.issparse(t) else res[i].dot(t))
                          for i, t in enumerate(r)])
Пример #2
0
    def _cross_prod(self):
        s = self.ent_table
        r = self.att_table
        k = self.kfkds
        ns = k[0].shape[0]
        ds = s.shape[1]
        nr = [t.shape[0] for t in self.att_table]
        dr = [t.shape[1] for t in self.att_table]
        if not self.trans:
            if s.size > 0:
                res = self._t_cross(s)
            else:
                res = np.zeros((ns, ns), dtype=float, order='C')
            if all(map(sp.issparse, r)):
                cross_r = [self._t_cross(t).toarray() for t in r]
            else:
                cross_r = [self._t_cross(t) for t in r]
            comp.expand_add(ns, len(k), k, cross_r, nr, res)

            return res
        else:
            if all(map(sp.issparse, self.att_table)):
                other = np.ones((1, ns))
                v = [
                    np.zeros((1, t.shape[0]), dtype=float)
                    for t in self.att_table
                ]
                comp.group(ns, len(k), 1, k, nr, other, v)
                size = self.att_table[0].size
                data = np.empty(size)

                # part 2 and 3 are p.T and p
                comp.multiply_sparse(size, self.att_table[0].row,
                                     self.att_table[0].data, np.sqrt(v[0]),
                                     data)
                diag_part = self._cross(
                    sp.coo_matrix((data, (self.att_table[0].row,
                                          self.att_table[0].col))))
                if ds > 0:
                    m = np.zeros((nr[0], ds))
                    comp.group_left(ns, ds, s, k[0], m)
                    p = self._cross(self.att_table[0], m)
                    s_part = self._cross(self.ent_table)

                    res = sp.vstack((np.hstack(
                        (s_part, p.T)), sp.hstack((p, diag_part))))
                else:
                    res = diag_part

                # multi-table join
                for i in range(1, len(k)):
                    ps = []
                    if ds > 0:
                        m = np.zeros((nr[i], ds))
                        comp.group_left(ns, ds, s, k[i], m)
                        ps += [self._cross(self.att_table[i], m)]

                    # cp (KRi)
                    size = self.att_table[i].size
                    data = np.empty(size)
                    comp.multiply_sparse(size, self.att_table[i].row,
                                         self.att_table[i].data, np.sqrt(v[i]),
                                         data)
                    diag_part = self._cross(
                        sp.coo_matrix((data, (self.att_table[i].row,
                                              self.att_table[i].col))))

                    for j in range(i):
                        ps += [r[i].tocsr()[k[i]].T.dot(r[j].tocsr()[k[j]])]

                    res = sp.vstack((sp.hstack(
                        (res, sp.vstack([p.T for p in ps]))),
                                     sp.hstack(ps + [diag_part])))
            else:
                nt = self.ent_table.shape[1] + sum(
                    [att.shape[1] for att in self.att_table])
                other = np.ones((1, ns))
                v = [
                    np.zeros((1, t.shape[0]), dtype=float)
                    for t in self.att_table
                ]
                res = np.empty((nt, nt))

                data = np.empty(self.att_table[0].shape, order='C')
                comp.group(ns, len(k), 1, k, nr, other, v)
                comp.multiply(self.att_table[0].shape[0],
                              self.att_table[0].shape[1], self.att_table[0],
                              v[0], data)
                res[ds:ds + dr[0], ds:ds + dr[0]] = self._cross(data)

                if ds > 0:
                    m = np.zeros((nr[0], ds))
                    comp.group_left(ns, ds, s, k[0], m)
                    res[ds:ds + dr[0], :ds] = self._cross(self.att_table[0], m)
                    res[:ds, ds:ds + dr[0]] = res[ds:ds + dr[0], :ds].T
                    res[:ds, :ds] = self._cross(self.ent_table)

                # multi-table join
                for i in range(1, len(self.kfkds)):
                    if ds > 0:
                        m = np.zeros((nr[i], ds))
                        comp.group_left(ns, ds, s, k[i], m)
                        ni1 = ds + sum(
                            [t.shape[1] for t in self.att_table[:i]])
                        ni2 = ni1 + self.att_table[i].shape[1]
                        res[ni1:ni2, :ds] = self._cross(self.att_table[i], m)
                        res[:ds, ni1:ni2] = res[ni1:ni2, :ds].T

                    # cp(KRi)
                    data = np.empty(self.att_table[i].shape, order='C')
                    comp.multiply(self.att_table[i].shape[0],
                                  self.att_table[i].shape[1],
                                  self.att_table[i], v[i], data)
                    res[ni1:ni2, ni1:ni2] = self._cross(data)

                    for j in range(i):
                        dj1 = ds + sum(
                            [t.shape[1] for t in self.att_table[:j]])
                        dj2 = dj1 + self.att_table[j].shape[1]

                        if (ns * 1.0 / nr[j]) > (1 + nr[j] * 1.0 / dr[j]):
                            m = np.zeros((nr[i], nr[j]), order='C')
                            comp.group_k_by_k(nr[i], nr[j], ns, k[i], k[j], m)

                            res[ni1:ni2, dj1:dj2] = r[i].T.dot(m.T.dot(r[j]))
                            res[dj1:dj2, ni1:ni2] = res[ni1:ni2, dj1:dj2].T
                        else:
                            res[ni1:ni2,
                                dj1:dj2] = r[i][k[i]].T.dot(r[j][k[j]])
                            res[dj1:dj2, ni1:ni2] = res[ni1:ni2, dj1:dj2].T
            return res
Пример #3
0
    def sum(self, axis=None, dtype=None, out=None):
        """
        Paramters
        ---------
        axis: None or int or tuple of ints, optional
            the axis used to perform sum aggreation.

        Examples
        --------
        T = Entity Table:
                [[ 1.  2.]
                 [ 4.  3.]
                 [ 5.  6.]
                 [ 8.  7.]
                 [ 9.  1.]]
            Attribute Table:
                [[ 1.1  2.2]
                 [ 3.3  4.4]]
            K:
                [[1, 0, 0, 1, 0]]
        >>> T.sum(axis=0)
            [[ 27.   19.   12.1  17.6]]
        >>> T.sum(axis=1)
            [[  6.3]
             [ 14.7]
             [ 18.7]
             [ 18.3]
             [ 17.7]]
        >>> T.sum()
            75.7
        """
        k = self.kfkds
        ns = k[0].shape[0]
        nr = [t.shape[0] for t in self.att_table]
        if axis == 0:
            # col sum
            if self.trans:
                return (self.ent_table.sum(axis=1) + sum(
                    (t.sum(axis=1)[self.kfkds[i]]
                     for i, t in enumerate(self.att_table)))).T
            else:
                other = np.ones((1, ns))

                res = [
                    np.zeros((1, t.shape[0]), dtype=float)
                    for t in self.att_table
                ]
                comp.group(ns, len(k), 1, k, nr, other, res)

                return np.hstack(
                    [self.ent_table.sum(axis=0)] +
                    [res[i] * t for i, t in enumerate(self.att_table)])
        elif axis == 1:
            # row sum
            if self.trans:
                other = np.ones((1, ns))

                res = [
                    np.zeros((1, t.shape[0]), dtype=float)
                    for t in self.att_table
                ]
                comp.group(ns, len(k), 1, k, nr, other, res)

                return np.hstack([self.ent_table.sum(
                    axis=0)] + [res * t
                                for i, t in enumerate(self.att_table)]).T
            else:
                return self.ent_table.sum(axis=1) + \
                       sum((t.sum(axis=1)[self.kfkds[i]] for i, t in enumerate(self.att_table)))

        # sum of the whole matrix
        # res is k * r
        other = np.ones((1, ns))
        res = [np.zeros((1, t.shape[0]), dtype=float) for t in self.att_table]
        comp.group(ns, len(k), 1, k, nr, other, res)
        return self.ent_table.sum() + \
               sum((res[i] * t.sum(axis=1) for i, t in enumerate(self.att_table)))._collapse(None)
Пример #4
0
    def _cross_prod(self):
        s = self.ent_table
        r = self.att_table
        k = self.kfkds
        ns = k[0].shape[0]
        ds = s.shape[1]
        nr = [t.shape[0] for t in self.att_table]
        dr = [t.shape[1] for t in self.att_table]

        if not self.trans:
            if all(map(sp.issparse, self.att_table)):
                return NotImplemented
            else:
                if s.size > 0:
                    res = self._t_cross(s)
                else:
                    res = np.zeros((ns, ns), dtype=float, order='C')

                if all(map(sp.issparse, r)):
                    cross_r = [self._t_cross(t).toarray() for t in r]
                else:
                    cross_r = [self._t_cross(t) for t in r]
                comp.expand_add(ns, len(k), k, cross_r, nr, res)

                return res

        else:
            if all(map(sp.issparse, self.att_table)):
                other = np.ones((1, ns))
                v = [np.zeros((1, t.shape[0]), dtype=float) for t in self.att_table]
                comp.group(ns, len(k), 1, k, nr, other, v)
                size = self.att_table[0].size
                data = np.empty(size)

                # part 2 and 3 are p.T and p
                comp.multiply_sparse(size, self.att_table[0].row, self.att_table[0].data, np.sqrt(v[0]), data)
                diag_part = self._cross(sp.coo_matrix((data, (self.att_table[0].row, self.att_table[0].col))))
                if ds > 0:
                    m = np.zeros((nr[0], ds))
                    comp.group_left(ns, ds, s, k[0], m)
                    p = self._cross(self.att_table[0], m)
                    s_part = self._cross(self.ent_table)

                    res = sp.vstack((np.hstack((s_part, p.T)), sp.hstack((p, diag_part))))
                else:
                    res = diag_part

                # multi-table join
                for i in range(1, len(k)):
                    ps = []
                    if ds > 0:
                        m = np.zeros((nr[i], ds))
                        comp.group_left(ns, ds, s, k[i], m)
                        ps += [self._cross(self.att_table[i], m)]

                    # cp (KRi)
                    size = self.att_table[i].size
                    data = np.empty(size)
                    comp.multiply_sparse(size, self.att_table[i].row, self.att_table[i].data, np.sqrt(v[i]), data)
                    diag_part = self._cross(sp.coo_matrix((data, (self.att_table[i].row, self.att_table[i].col))))

                    for j in range(i):
                        ps += [r[i].tocsr()[k[i]].T.dot(r[j].tocsr()[k[j]])]

                    res = sp.vstack((sp.hstack((res, sp.vstack([p.T for p in ps]))), sp.hstack(ps + [diag_part])))
            else:
                s = np.ascontiguousarray(s)
                if self.second_order:
                    nt = self.shape[0]
                    other = np.ones((1, ns))
                    v = [np.zeros((1, t.shape[0]), dtype=float) for t in r]

                    res = np.empty((nt, nt))
                    comp.group(ns, len(k), 1, k, nr, other, v)
                    data = sp.diags(np.sqrt(v[0]), [0]) * r[0]
                    res[ds:ds+dr[0], ds:ds+dr[0]] = self._cross(data)

                    if ds > 0:
                        # p1
                        m1 = np.zeros((nr[0], ds), dtype=float)
                        comp.group_left(ns, ds, s, k[0], m1)
                        res[ds:ds + dr[0], :ds] = r[0].T.dot(m1)
                        res[:ds, ds:ds + dr[0]] = res[ds:ds + dr[0], :ds].T
                        # s
                        res[:ds, :ds] = self._cross(self.ent_table)

                        r_p = base.data_interaction_rr(self.shadow_ent_table, self.shadow_att_table[0][k[0]])
                        # p2
                        res[:ds, ds+dr[0]:] = s.T.dot(r_p)
                        res[ds+dr[0]:, :ds] = res[:ds, ds+dr[0]:].T
                        # p3
                        m = np.zeros((nr[0], self.shadow_ent_table.shape[1]), dtype=float)
                        comp.group_left(ns, self.shadow_ent_table.shape[1], self.shadow_ent_table, k[0], m)
                        ksr = base.data_interaction_rr(m, self.shadow_att_table[0])
                        res[ds:ds+dr[0], ds+dr[0]:] = self._cross(r[0], ksr)
                        res[ds+dr[0]:, ds:ds+dr[0]] = res[ds:ds+dr[0], ds+dr[0]:].T
                        rrx = base.data_interaction_rr(self.shadow_att_table[0], self.shadow_att_table[0])
                        ssx = base.data_interaction_rr(self.shadow_ent_table, self.shadow_ent_table)
                        dss = self.shadow_ent_table.shape[1] * self.shadow_ent_table.shape[1]
                        kss = np.zeros((nr[0], dss), dtype=float)
                        comp.group_left(ns, dss, ssx, k[0], kss)
                        res[ds + dr[0]:, ds + dr[0]:] = rrx.T.dot(kss).reshape((r_p.shape[1], r_p.shape[1]))
                else:
                    nt = self.ent_table.shape[1] + self.att_table[0].shape[1]
                    other = np.ones((1, ns))
                    v = [np.zeros((1, t.shape[0]), dtype=float) for t in self.att_table]
                    comp.group(ns, len(k), 1, k, nr, other, v)

                    res = np.empty((nt, nt))
                    data = sp.diags(np.sqrt(v[0]), [0]) * r[0]
                    res[ds:, ds:] = self._cross(data)
                    if ds > 0:
                        m = np.zeros((nr[0], ds))
                        comp.group_left(ns, ds, s, k[0], m)
                        res[ds:, :ds] = self._cross(self.att_table[0], m)
                        res[:ds, ds:] = res[ds:, :ds].T
                        res[:ds, :ds] = self._cross(self.ent_table)

                    # multi-table join
                    for i in range(1, len(self.kfkds)):
                        if ds > 0:
                            m = np.zeros((nr[i], ds))
                            comp.group_left(ns, ds, s, k[i], m)
                            ni1 = ds + sum([t.shape[1] for t in self.att_table[:i]])
                            ni2 = ni1 + self.att_table[i].shape[1]
                            res[ni1:ni2, :ds] = self._cross(self.att_table[i], m)
                            res[:ds, ni1:ni2] = res[ni1:ni2, :ds].T

                        # cp(KRi)
                        data = np.empty(self.att_table[i].shape, order='C')
                        comp.multiply(self.att_table[i].shape[0], self.att_table[i].shape[1], self.att_table[i], v[i],
                                      data)
                        res[ni1:ni2, ni1:ni2] = self._cross(data)

                        for j in range(i):
                            dj1 = ds + sum([t.shape[1] for t in self.att_table[:j]])
                            dj2 = dj1 + self.att_table[j].shape[1]

                            if (ns * 1.0 / nr[j]) > (1 + nr[j] * 1.0 / dr[j]):
                                m = np.zeros((nr[i], nr[j]), order='C')
                                comp.group_k_by_k(nr[i], nr[j], ns, k[i], k[j], m)

                                res[ni1:ni2, dj1:dj2] = r[i].T.dot(m.T.dot(r[j]))
                                res[dj1:dj2, ni1:ni2] = res[ni1:ni2, dj1:dj2].T
                            else:
                                res[ni1:ni2, dj1:dj2] = r[i][k[i]].T.dot(r[j][k[j]])
                                res[dj1:dj2, ni1:ni2] = res[ni1:ni2, dj1:dj2].T

            if self.a != 1.0:
                res = res * np.power(self.a, 2)
            if self.b != 0.0:
                return NotImplemented

            return res
Пример #5
0
    def _right_matrix_multiplication(self, n_matrix, other, permute=None):
        other = other.astype(np.float, order='C')
        s = n_matrix.ent_table
        k = n_matrix.kfkds
        r = n_matrix.att_table

        ns = k[0].shape[0]
        nr = [t.shape[0] for t in r]
        nk = len(k)
        nw = other.shape[0]
        res = [np.zeros((nw, t.shape[0]), dtype=float) for t in r]

        comp.group(ns, nk, nw, k, nr, other, res)
        rr = []
        if self.second_order:
            if self.shadow_ent_table is not None:
                for i in range(len(r)):
                    # sr
                    ss = self.shadow_ent_table
                    r1 = self.shadow_att_table[i]
                    k1 = self.kfkds[i]

                    # TODO: 1. avoid materializing a map 2. use 2d array in transformation
                    if not sp.issparse(s) and not sp.issparse(r1):
                        u = np.zeros((r1.shape[0], ss.shape[1] * nw), dtype=float, order='C')
                        comp.group_left(ss.shape[0], ss.shape[1] * nw,
                                        np.ascontiguousarray(base.data_interaction_rr(ss, np.matrix(other).T)), k1, u)
                        if permute is None:
                            rr += [np.matrix(base.data_interaction_rr(r1, u).sum(axis=0).reshape(nw, -1))]
                        else:
                            rr += [np.matrix(base.data_interaction_rr(r1, u).sum(axis=0)[permute].reshape(nw, -1))]
                    else:
                        u = np.zeros((nw, r1.shape[1] * ss.shape[1]), dtype=float, order='C')
                        comp.data_interaction_group_sparse(ns, len(ss.data), ss.shape[0], ss.shape[1],
                                                           len(r1.data), r1.shape[0], r1.shape[1], nw,
                                                           ss.row, ss.col, ss.data, r1.row, r1.col, r1.data,
                                                           k1, k1, np.ascontiguousarray(other), u)

            for i in range(len(r)):
                for j in range(i+1, len(r)):
                    r1, r2 = self.shadow_att_table[i], self.shadow_att_table[j]
                    k1, k2 = self.kfkds[i], self.kfkds[j]

                    r1, r2 = r2, r1
                    k1, k2 = k2, k1

                    if not sp.issparse(r1) and not sp.issparse(r2):
                        u = np.zeros((r2.shape[0], r1.shape[1] * nw), dtype=float, order='C')
                        comp.data_interaction_group(ns, r1.shape[1], nw, k1, k2, r1, np.ascontiguousarray(other), u)
                        rr += [np.matrix(base.data_interaction_rr(r2, u).sum(axis=0).reshape(nw, -1))]
                    else:
                        u = np.zeros((nw, r1.shape[1] * r2.shape[1]), dtype=float, order='C')
                        r1 = r1.tocsr()[k1].tocoo()
                        comp.data_interaction_group_sparse(ns, len(r1.data), r1.shape[0], r1.shape[1],
                                                           len(r2.data), r2.shape[0], r2.shape[1], nw,
                                                           r1.row, r1.col, r1.data, r2.row, r2.col, r2.data,
                                                           k1, k2, np.ascontiguousarray(other), u)
                        rr += [u]

        if self.identity:
            l = ([other * s if sp.issparse(s) else other.dot(s)] if s.shape[1] != 0 else [])
            for i, t in enumerate(r):
                l.append(np.matrix(res[i]))
                l.append(res[i] * t if sp.issparse(t) else res[i].dot(t))
            l += rr
            total = np.hstack(l)
        else:
            total = np.hstack(([other * s if sp.issparse(s) else other.dot(s)] if s.shape[1] != 0 else []) +
                             [(res[i] * t if sp.issparse(t) else res[i].dot(t)) for i, t in enumerate(r)] + rr)

        if self.a != 1.0:
            total = total * self.a
        if self.b != 0.0:
            total = total + self.b * other.sum(axis=1)

        return total
Пример #6
0
    def sum(self, axis=None, dtype=None, out=None):
        """
        Paramters
        ---------
        axis: None or int or tuple of ints, optional
            the axis used to perform sum aggreation.

        Examples
        --------
        T = Entity Table:
                [[ 1.  2.]
                 [ 4.  3.]
                 [ 5.  6.]
                 [ 8.  7.]
                 [ 9.  1.]]
            Attribute Table:
                [[ 1.1  2.2]
                 [ 3.3  4.4]]
            K:
                [[1, 0, 0, 1, 0]]
        >>> T.sum(axis=0)
            [[ 27.   19.   12.1  17.6]]
        >>> T.sum(axis=1)
            [[  6.3]
             [ 14.7]
             [ 18.7]
             [ 18.3]
             [ 17.7]]
        >>> T.sum()
            75.7
        """
        k = self.kfkds
        ns = k[0].shape[0]
        nr = [t.shape[0] for t in self.att_table]
        if axis == 0:
            # col sum
            if self.trans:
                rr = []
                if self.second_order:
                    r_sum = [t.sum(axis=1)[self.kfkds[i]] for i, t in enumerate(self.shadow_att_table)]
                    for i in range(len(r_sum)):
                        for j in range(i + 1, len(r_sum)):
                            rr += [np.multiply(r_sum[i], r_sum[j])]

                res = self.ent_table.sum(axis=1).reshape((ns, -1)) + \
                      sum((t.sum(axis=1)[self.kfkds[i]] for i, t in enumerate(self.att_table))).reshape((ns, -1)) \
                      + sum(rr)

                if self.identity:
                    res += ns * len(self.att_table)
                if self.a != 1.0:
                    res *= self.a
                if self.b != 0.0:
                    res += self.shape[1] * self.b

                return res.T
            else:
                other = np.ones((1, ns))

                return self._right_matrix_multiplication(self, other)
        elif axis == 1:
            # row sum
            if self.trans:
                other = np.ones((1, ns))

                return self._right_matrix_multiplication(self, other).T
            else:
                rr = self._row_sum_rr()
                res = sum((t.sum(axis=1)[self.kfkds[i]] for i, t in enumerate(self.att_table))).reshape((ns, -1)) \
                       + sum(rr)

                if self.ent_table is not None and self.ent_table.size > 0:
                    res += self.ent_table.sum(axis=1).reshape((ns, -1))

                if self.identity:
                    res += 1 * len(self.att_table)
                if self.a != 1.0:
                    res *= self.a
                if self.b != 0.0:
                    res += self.shape[1] * self.b

                return res

        # sum of the whole matrix
        # res is k * r
        other = np.ones((1, ns))
        g = [np.zeros((1, t.shape[0]), dtype=np.float64) for t in self.att_table]
        comp.group(ns, len(k), 1, k, nr, other, g)
        res = self.ent_table.sum() + \
            sum((g[i] * np.matrix(t.sum(axis=1).reshape(-1, 1)) for i, t in enumerate(self.att_table)))._collapse(None)

        if self.second_order:
            res += sum(self._row_sum_rr()).sum()

        if self.identity:
            res += ns * len(self.att_table)

        if self.a != 1.0:
            res = res * self.a
        if self.b != 0.0:
            res = res + self.shape[1] * self.shape[0] * self.b

        return res
Пример #7
0
    def _cross_prod_w(self, w):
        # Calculate X * A * X.T. A is a diagnalized matrix, and w is the array of diagnal of A.

        w = w.astype(float)

        s = self.ent_table
        r = self.att_table
        k = self.kfkds

        ns = k[0].shape[0]
        ds = s.shape[1]
        nr = [t.shape[0] for t in r]
        dr = [t.shape[1] for t in r]

        if not self.trans:
            if s.size > 0:

                res = self._t_cross_w(s, w[0:ds])
            else:
                res = np.zeros((ns, ns), dtype=float, order='C')

            count = ds
            cross_r = []
            for t in r:
                if all(map(sp.issparse, r)):
                    cross_r.append(
                        self._t_cross_w(t,
                                        w[count:count + t.shape[1]]).toarray())
                else:
                    cross_r.append(
                        self._t_cross_w(t, w[count:count + t.shape[1]]))

                count += t.shape[1]

            comp.expand_add(ns, len(k), k, cross_r, nr, res)
        else:

            if all(map(sp.issparse, r)):
                # change the 'other' as weight to group
                other = w.reshape((1, -1)).astype(float)
                s2 = w.reshape(-1, 1) * np.array(s)
                v = [np.zeros((1, t.shape[0]), dtype=float) for t in r]
                comp.group(ns, len(k), 1, k, nr, other, v)
                size = r[0].size
                data = np.empty(size)

                # part 2 and 3 are p.T and p
                comp.multiply_sparse(size, r[0].row, r[0].data, np.sqrt(v[0]),
                                     data)
                diag_part = self._cross(
                    sp.coo_matrix((data, (r[0].row, r[0].col))))
                if ds > 0:
                    m = np.zeros((nr[0], ds))
                    comp.group_left(ns, ds, s2, k[0], m)
                    p = self._cross(r[0], m)
                    s_part = self._cross(s, s2)

                    res = sp.vstack((np.hstack(
                        (s_part, p.T)), sp.hstack((p, diag_part))))
                else:
                    res = diag_part

                # multi-table join
                for i in range(1, len(k)):
                    ps = []
                    if ds > 0:
                        m = np.zeros((nr[i], ds))
                        comp.group_left(ns, ds, s2, k[i], m)
                        ps += [self._cross(r[i], m)]

                    # cp (KRi)
                    size = r[i].size
                    data = np.empty(size)
                    comp.multiply_sparse(size, r[i].row, r[i].data,
                                         np.sqrt(v[i]), data)
                    diag_part = self._cross(
                        sp.coo_matrix((data, (r[i].row, r[i].col))))

                    for j in range(i):
                        ps += [
                            r[i].tocsr()[k[i]].T.dot(
                                r[j].tocsr()[k[j]].multiply(w.reshape(-1, 1)))
                        ]

                    res = sp.vstack((sp.hstack(
                        (res, sp.vstack([p.T for p in ps]))),
                                     sp.hstack(ps + [diag_part])))

            else:
                nt = s.shape[1] + sum([att.shape[1] for att in r])
                other = w.reshape((1, -1)).astype(float)
                s2 = w.reshape(-1, 1) * np.array(s)
                v = [np.zeros((1, t.shape[0]), dtype=float) for t in r]
                res = np.empty((nt, nt))

                data = np.empty(r[0].shape, order='C')
                comp.group(ns, len(k), 1, k, nr, other, v)
                comp.multiply(r[0].shape[0], r[0].shape[1], r[0], v[0], data)
                res[ds:ds + dr[0], ds:ds + dr[0]] = self._cross(data)

                if ds > 0:
                    m = np.zeros((nr[0], ds))
                    comp.group_left(ns, ds, s2, k[0], m)
                    res[ds:ds + dr[0], :ds] = self._cross(r[0], m)
                    res[:ds, ds:ds + dr[0]] = res[ds:ds + dr[0], :ds].T
                    res[:ds, :ds] = self._cross(s, s2)

                # multi-table join
                for i in range(1, len(k)):

                    if ds > 0:
                        m = np.zeros((nr[i], ds))
                        comp.group_left(ns, ds, s2, k[i], m)
                        ni1 = ds + sum([t.shape[1] for t in r[:i]])
                        ni2 = ni1 + r[i].shape[1]
                        res[ni1:ni2, :ds] = self._cross(r[i], m)
                        res[:ds, ni1:ni2] = res[ni1:ni2, :ds].T

                    # cp(KRi)
                    data = np.empty(r[i].shape, order='C')
                    comp.multiply(r[i].shape[0], r[i].shape[1], r[i], v[i],
                                  data)
                    res[ni1:ni2, ni1:ni2] = self._cross(data)

                    for j in range(i):
                        dj1 = ds + sum([t.shape[1] for t in r[:j]])
                        dj2 = dj1 + r[j].shape[1]
                        if (ns * 1.0 / nr[j]) > (1 + nr[j] * 1.0 / dr[j]):
                            m = np.zeros((nr[i], nr[j]), order='C')
                            # Update in comp.cpp. When count the number in each group, add w instead of 1.
                            comp.group_k_by_k_w(nr[i], nr[j], ns, w, k[i],
                                                k[j], m)

                            res[ni1:ni2, dj1:dj2] = r[i].T.dot(m.T.dot(r[j]))
                            res[dj1:dj2, ni1:ni2] = res[ni1:ni2, dj1:dj2].T
                        else:
                            res[ni1:ni2,
                                dj1:dj2] = (w.reshape(-1, 1) *
                                            np.array(r[i][k[i]])).T.dot(
                                                r[j][k[j]])
                            res[dj1:dj2, ni1:ni2] = res[ni1:ni2, dj1:dj2].T

        return res
Пример #8
0
    def _cross_prod_hess(self, w):
        """Calculate X * A * X.T. A is a diagnalized matrix, and w is the array of diagnal of A.

        Parameters
        ----------
        w : ndarray, shape (num_samples,)
            array of diagnal of A

        Returns
        -------
        res : X * A * X.T

        Examples
        --------
        T = Entity Table:
                [[ 1.  2.]
                 [ 4.  3.]
                 [ 5.  6.]
                 [ 8.  7.]
                 [ 9.  1.]]
            Attribute Table:
                [[ 1.1  2.2]
                 [ 3.3  4.4]]
            K:
                [[0, 1, 1, 0, 1]]
        >>> T._cross_prod_hess(np.arange(5))
            [[ 582.,  276.,  174.,  248.],
             [ 276.,  232.,   78.,  118.],
             [ 174.,   78.,   66.,   90.],
             [ 248.,  118.,   90.,  124.]]

        """

        w = w.astype(float)
        s = self.ent_table
        r = self.att_table
        k = self.kfkds
        ns = k[0].shape[0]
        ds = s.shape[1]
        nr = [t.shape[0] for t in r]
        dr = [t.shape[1] for t in r]

        if not self.trans:
            if s.size > 0:
                res = self._t_cross_w(s, w[0:ds])
            else:
                res = np.zeros((ns, ns), dtype=float, order='C')
            count = ds
            cross_r = []
            for t in r:
                if all(map(sp.issparse, r)):
                    cross_r.append(
                        self._t_cross_w(t,
                                        w[count:count + t.shape[1]]).toarray())
                else:
                    cross_r.append(
                        self._t_cross_w(t, w[count:count + t.shape[1]]))
                count += t.shape[1]
            comp.expand_add(ns, len(k), k, cross_r, nr, res)
        else:

            if all(map(sp.issparse, r)):
                # change the 'other' as weight to group
                other = w.reshape((1, -1)).astype(float)
                s2 = w.reshape(-1, 1) * np.array(s)
                v = [np.zeros((1, t.shape[0]), dtype=float) for t in r]
                comp.group(ns, len(k), 1, k, nr, other, v)
                size = r[0].size
                data = np.empty(size)

                comp.multiply_sparse(size, r[0].row, r[0].data, np.sqrt(v[0]),
                                     data)
                diag_part = self._cross(
                    sp.coo_matrix((data, (r[0].row, r[0].col))))
                if ds > 0:
                    m = np.zeros((nr[0], ds))
                    comp.group_left(ns, ds, s2, k[0], m)
                    p = self._cross(r[0], m)
                    s_part = self._cross(s, s2)

                    res = sp.vstack((np.hstack(
                        (s_part, p.T)), sp.hstack((p, diag_part))))
                else:
                    res = diag_part

                # multi-table join
                for i in range(1, len(k)):
                    ps = []
                    if ds > 0:
                        m = np.zeros((nr[i], ds))
                        comp.group_left(ns, ds, s2, k[i], m)
                        ps += [self._cross(r[i], m)]

                    size = r[i].size
                    data = np.empty(size)
                    comp.multiply_sparse(size, r[i].row, r[i].data,
                                         np.sqrt(v[i]), data)
                    diag_part = self._cross(
                        sp.coo_matrix((data, (r[i].row, r[i].col))))

                    for j in range(i):
                        ps += [
                            r[i].tocsr()[k[i]].T.dot(
                                r[j].tocsr()[k[j]].multiply(w.reshape(-1, 1)))
                        ]

                    res = sp.vstack((sp.hstack(
                        (res, sp.vstack([p.T for p in ps]))),
                                     sp.hstack(ps + [diag_part])))

            else:
                nt = s.shape[1] + sum([att.shape[1] for att in r])
                other = w.reshape((1, -1)).astype(float)
                s2 = w.reshape(-1, 1) * np.array(s)
                v = [np.zeros((1, t.shape[0]), dtype=float) for t in r]
                res = np.empty((nt, nt))

                data = np.empty(r[0].shape, order='C')
                comp.group(ns, len(k), 1, k, nr, other, v)
                comp.multiply(r[0].shape[0], r[0].shape[1], r[0], v[0], data)
                res[ds:ds + dr[0], ds:ds + dr[0]] = self._cross(data)

                if ds > 0:
                    m = np.zeros((nr[0], ds))
                    comp.group_left(ns, ds, s2, k[0], m)
                    res[ds:ds + dr[0], :ds] = self._cross(r[0], m)
                    res[:ds, ds:ds + dr[0]] = res[ds:ds + dr[0], :ds].T
                    res[:ds, :ds] = self._cross(s, s2)

                # multi-table join
                for i in range(1, len(k)):

                    if ds > 0:
                        m = np.zeros((nr[i], ds))
                        comp.group_left(ns, ds, s2, k[i], m)
                        ni1 = ds + sum([t.shape[1] for t in r[:i]])
                        ni2 = ni1 + r[i].shape[1]
                        res[ni1:ni2, :ds] = self._cross(r[i], m)
                        res[:ds, ni1:ni2] = res[ni1:ni2, :ds].T

                    data = np.empty(r[i].shape, order='C')
                    comp.multiply(r[i].shape[0], r[i].shape[1], r[i], v[i],
                                  data)
                    res[ni1:ni2, ni1:ni2] = self._cross(data)

                    for j in range(i):
                        dj1 = ds + sum([t.shape[1] for t in r[:j]])
                        dj2 = dj1 + r[j].shape[1]
                        if (ns * 1.0 / nr[j]) > (1 + nr[j] * 1.0 / dr[j]):
                            m = np.zeros((nr[i], nr[j]), order='C')
                            # Update in comp.cpp. When count the number in each group, add w instead of 1.
                            comp.group_k_by_k_w(nr[i], nr[j], ns, w, k[i],
                                                k[j], m)

                            res[ni1:ni2, dj1:dj2] = r[i].T.dot(m.T.dot(r[j]))
                            res[dj1:dj2, ni1:ni2] = res[ni1:ni2, dj1:dj2].T
                        else:
                            res[ni1:ni2,
                                dj1:dj2] = (w.reshape(-1, 1) *
                                            np.array(r[i][k[i]])).T.dot(
                                                r[j][k[j]])
                            res[dj1:dj2, ni1:ni2] = res[ni1:ni2, dj1:dj2].T

        return res