示例#1
0
 def __init__(self, X, Y, C=None, dtype=None):
     if dtype is None:
         dtype = X.dtype
     self.pX = PyMatrix.init_from(X, dtype)
     self.pY = PyMatrix.init_from(Y, dtype)
     self.pC = PyMatrix.init_from(C, dtype)
     Z = None if C is None else smat.csr_matrix(self.Y.dot(self.C))
     self.pZ = PyMatrix.init_from(Z, dtype)  # Z = Y * C
     self.dtype = dtype
示例#2
0
 def __init__(self, W, C=None, dtype=None):
     if C is not None:
         if isinstance(C, PyMatrix):
             assert C.buf.shape[0] == W.shape[1]
         else:
             assert C.shape[0] == W.shape[1], 'C:{} W:{}'.format(
                 C.shape, W.shape)
     if dtype is None:
         dtype = W.dtype
     self.pC = PyMatrix.init_from(C, dtype)
     self.pW = PyMatrix.init_from(W, dtype)
示例#3
0
 def __init__(self, X, Y, C=None, bias=-1.0, dtype=None):
     if dtype is None:
         dtype = X.dtype
     self.bias = bias
     if self.bias > 0:
         X = smat_util.append_column(X, self.bias)
     self.pX = PyMatrix.init_from(X, dtype)
     self.pY = PyMatrix.init_from(Y, dtype)
     self.pC = PyMatrix.init_from(C, dtype)
     Z = None if C is None else smat.csr_matrix(self.Y.dot(self.C))
     self.pZ = PyMatrix.init_from(Z, dtype)  # Z = Y * C
     self.dtype = dtype
示例#4
0
    def predict_new(
        self,
        X,
        only_topk=None,
        csr_codes=None,
        cond_prob=None,
        normalized=False,
        threads=-1,
    ):
        assert X.shape[1] == self.nr_features
        if csr_codes is None:
            dense = X.dot(self.W).toarray()
            if cond_prob:
                dense = cond_prob.transform(dense, inplace=True)
            coo = smat_util.dense_to_coo(dense)
            pred_csr = smat_util.sorted_csr_from_coo(coo.shape,
                                                     coo.row,
                                                     coo.col,
                                                     coo.data,
                                                     only_topk=only_topk)
        else:  # csr_codes is given
            assert self.C is not None, "This model does not have C"
            assert X.shape[1] == self.nr_features
            assert csr_codes.shape[0] == X.shape[0]
            assert csr_codes.shape[1] == self.nr_codes
            if not csr_codes.has_sorted_indices:
                csr_codes = csr_codes.sorted_indices()
            if (csr_codes.data == 0).sum() != 0:
                # this is a trick to avoid zero entries explicit removal from the smat_dot_smat
                offset = sp.absolute(csr_codes.data).max() + 1
                csr_codes = smat.csr_matrix(
                    (csr_codes.data + offset, csr_codes.indices,
                     csr_codes.indptr),
                    shape=csr_codes.shape,
                )
                pZ = PyMatrix.init_from(csr_codes, self.dtype)
                csr_labels, pred_csr = clib.multilabel_predict_with_codes(
                    X, self.pW, self.pC, pZ, threads=threads)
                csr_labels.data -= offset
            else:
                pZ = PyMatrix.init_from(csr_codes.sorted_indices(), self.dtype)
                csr_labels, pred_csr = clib.multilabel_predict_with_codes(
                    X, self.pW, self.pC, pZ, threads=threads)
            val = pred_csr.data
            if cond_prob:
                val = cond_prob.transform(val, inplace=True)
                val = cond_prob.combiner(val, csr_labels.data)

            pred_csr = smat_util.sorted_csr(pred_csr, only_topk=only_topk)

        if normalized:
            pred_csr = sk_normalize(pred_csr, axis=1, copy=False, norm="l1")
        return pred_csr
示例#5
0
    def predict_new(self,
                    X,
                    only_topk=None,
                    csr_codes=None,
                    beam_size=2,
                    max_depth=None,
                    cond_prob=True,
                    normalized=False,
                    threads=-1):
        if max_depth is None:
            max_depth = self.depth
        if cond_prob is None or cond_prob == False:
            cond_prob = PostProcessor(Transform.identity, Combiner.noop)
        if cond_prob == True:
            cond_prob = PostProcessor(Transform.get_lpsvm(3), Combiner.mul)
        assert isinstance(cond_prob, PostProcessor), tpye(cond_prob)

        assert X.shape[1] == self.nr_features
        if self.bias > 0:
            X = smat_util.append_column(X, self.bias)
        pX = PyMatrix.init_from(X, dtype=self.model_chain[0].pW.dtype)
        max_depth = min(self.depth, max_depth)
        pred_csr = csr_codes
        for d in range(max_depth):
            cur_model = self.model_chain[d]
            local_only_topk = only_topk if d == (max_depth - 1) else beam_size
            pred_csr = cur_model.predict_new(pX,
                                             only_topk=local_only_topk,
                                             csr_codes=pred_csr,
                                             cond_prob=cond_prob,
                                             threads=threads)
        if normalized:
            pred_csr = sk_normalize(pred_csr, axis=1, copy=False, norm='l1')
        return pred_csr
示例#6
0
 def __init__(self, X, Y, C=None, bias=-1.0, dtype=None, Z_pred=None):
     if dtype is None:
         dtype = X.dtype
     self.bias = bias
     if self.bias > 0:
         X = smat_util.append_column(X, self.bias)
     self.pX = PyMatrix.init_from(X, dtype)
     self.pY = PyMatrix.init_from(Y, dtype)
     self.pC = PyMatrix.init_from(C, dtype)
     Z = None if C is None else smat.csr_matrix(self.Y.dot(self.C))
     if Z_pred is not None and Z is not None:
         print("Z", Z.shape)
         print("Z_pred", Z_pred.shape)
         Z = Z + Z_pred
         Z = Z.tocsr()
     self.pZ = PyMatrix.init_from(Z, dtype)  # Z = Y * C
     self.dtype = dtype
示例#7
0
 def predict_values(self, X, inst_idx, label_idx, out=None, threads=-1):
     assert X.shape[1] == self.nr_features
     if out is None:
         out = sp.zeros(inst_idx.shape, dtype=self.pW.dtype)
     pX = PyMatrix.init_from(X, dtype=self.pW.dtype)
     out = clib.sparse_inner_products(pX,
                                      self.pW,
                                      inst_idx.astype(sp.uint32),
                                      label_idx.astype(sp.uint32),
                                      out,
                                      threads=threads)
     return out
示例#8
0
 def __init__(self,
              X,
              Y,
              C=None,
              dtype=None,
              Z_pred=None,
              negative_sampling_scheme=None):
     if dtype is None:
         dtype = X.dtype
     self.pX = PyMatrix.init_from(X, dtype)
     self.pY = PyMatrix.init_from(Y, dtype)
     self.pC = PyMatrix.init_from(C, dtype)
     Z = None if C is None else smat.csr_matrix(self.Y.dot(self.C))
     if negative_sampling_scheme is None or negative_sampling_scheme == 1:
         Z = Z
     elif negative_sampling_scheme is not None:
         if negative_sampling_scheme == 0:
             Z = (Z + Z_pred).tocsr()
         elif negative_sampling_scheme == 1:
             Z = Z
         elif negative_sampling_scheme == 2 and Z_pred is not None:
             Z = Z_pred
     self.pZ = PyMatrix.init_from(Z, dtype)  # Z = Y * C
     self.dtype = dtype
示例#9
0
    def predict_new(self,
                    X,
                    only_topk=None,
                    csr_codes=None,
                    beam_size=2,
                    max_depth=None,
                    cond_prob=True,
                    normalized=False,
                    threads=-1):
        if max_depth is None:
            max_depth = self.depth
        if cond_prob is None or cond_prob == False:
            cond_prob = PostProcessor(Transform.identity, Combiner.noop)
        if cond_prob == True:
            cond_prob = PostProcessor(Transform.get_lpsvm(3), Combiner.mul)
        assert isinstance(cond_prob, PostProcessor), tpye(cond_prob)

        pX = PyMatrix.init_from(X, dtype=self.model_chain[0].pW.dtype)
        max_depth = min(self.depth, max_depth)
        transform = cond_prob.transform if cond_prob else Transform.identity
        pred_csr = csr_codes
        #timer = WallTimer()
        for d in range(max_depth):
            '''
            print('predict at depth {}'.format(d))
            sys.stdout.flush()
            timer.tic()
            '''
            cur_model = self.model_chain[d]
            local_only_topk = only_topk if d == (max_depth - 1) else beam_size
            pred_csr = cur_model.predict_new(pX,
                                             only_topk=local_only_topk,
                                             csr_codes=pred_csr,
                                             transform=transform,
                                             cond_prob=cond_prob,
                                             threads=threads)
            '''
            print('>>> {}ms'.format(timer.toc()))
            sys.stdout.flush()
            '''
            #if cond_prob and normalized: # perform normalization to avoid numerical issue
            #    pred_csr = sk_normalize(pred_csr, axis=1, copy=False, norm='l1')
            #print('d = {} codes:{} nnz:{}'.format(d, pred_csr.shape[1], pred_csr.nnz))
        #pred_csr.data[:] = sp.exp(pred_csr.data[:])
        if normalized:
            pred_csr = sk_normalize(pred_csr, axis=1, copy=False, norm='l1')
        return pred_csr
示例#10
0
    def predict_new(self,
                    X,
                    only_topk=None,
                    transform=None,
                    csr_codes=None,
                    cond_prob=None,
                    normalized=False,
                    threads=-1):
        assert X.shape[1] == self.nr_features
        if csr_codes is None:
            dense = X.dot(self.W).toarray()
            if transform:
                dense = transform(dense, inplace=True)
            coo = smat_util.dense_to_coo(dense)
            pred_csr = smat_util.sorted_csr_from_coo(coo.shape,
                                                     coo.row,
                                                     coo.col,
                                                     coo.data,
                                                     only_topk=only_topk)
        else:  # csr_codes is given
            assert self.C is not None, "This model does not have C"
            assert X.shape[1] == self.nr_features
            assert csr_codes.shape[0] == X.shape[0]
            assert csr_codes.shape[1] == self.nr_codes
            pZ = PyMatrix.init_from(csr_codes, self.dtype)
            csr_labels, pred_csr = clib.multilabel_predict_with_codes(
                X, self.pW, self.pC, pZ, threads=threads)
            val = pred_csr.data
            if transform:
                val = transform(val, inplace=True)
            if cond_prob:
                val[:] = cond_prob.combiner(val, csr_labels.data)

            pred_csr = smat_util.sorted_csr(pred_csr, only_topk=only_topk)

        if normalized:
            pred_csr = sk_normalize(pred_csr, axis=1, copy=False, norm='l1')
        return pred_csr
示例#11
0
文件: indexer.py 项目: xingz9/X-BERT
 def __init__(self, feat_mat):
     self.py_feat_mat = PyMatrix.init_from(feat_mat)