Пример #1
0
 def __init__(self, X, Y, C=None, dtype=None):
     if dtype is None:
         dtype = X.dtype
     self.pX = PyMatrix.init_from(X, dtype)
     self.pY = PyMatrix.init_from(Y, dtype)
     self.pC = PyMatrix.init_from(C, dtype)
     Z = None if C is None else smat.csr_matrix(self.Y.dot(self.C))
     self.pZ = PyMatrix.init_from(Z, dtype)  # Z = Y * C
     self.dtype = dtype
Пример #2
0
 def __init__(self, W, C=None, dtype=None):
     if C is not None:
         if isinstance(C, PyMatrix):
             assert C.buf.shape[0] == W.shape[1]
         else:
             assert C.shape[0] == W.shape[1], 'C:{} W:{}'.format(
                 C.shape, W.shape)
     if dtype is None:
         dtype = W.dtype
     self.pC = PyMatrix.init_from(C, dtype)
     self.pW = PyMatrix.init_from(W, dtype)
Пример #3
0
 def __init__(self, X, Y, C=None, bias=-1.0, dtype=None):
     if dtype is None:
         dtype = X.dtype
     self.bias = bias
     if self.bias > 0:
         X = smat_util.append_column(X, self.bias)
     self.pX = PyMatrix.init_from(X, dtype)
     self.pY = PyMatrix.init_from(Y, dtype)
     self.pC = PyMatrix.init_from(C, dtype)
     Z = None if C is None else smat.csr_matrix(self.Y.dot(self.C))
     self.pZ = PyMatrix.init_from(Z, dtype)  # Z = Y * C
     self.dtype = dtype
Пример #4
0
    def predict_new(
        self,
        X,
        only_topk=None,
        csr_codes=None,
        cond_prob=None,
        normalized=False,
        threads=-1,
    ):
        assert X.shape[1] == self.nr_features
        if csr_codes is None:
            dense = X.dot(self.W).toarray()
            if cond_prob:
                dense = cond_prob.transform(dense, inplace=True)
            coo = smat_util.dense_to_coo(dense)
            pred_csr = smat_util.sorted_csr_from_coo(coo.shape,
                                                     coo.row,
                                                     coo.col,
                                                     coo.data,
                                                     only_topk=only_topk)
        else:  # csr_codes is given
            assert self.C is not None, "This model does not have C"
            assert X.shape[1] == self.nr_features
            assert csr_codes.shape[0] == X.shape[0]
            assert csr_codes.shape[1] == self.nr_codes
            if not csr_codes.has_sorted_indices:
                csr_codes = csr_codes.sorted_indices()
            if (csr_codes.data == 0).sum() != 0:
                # this is a trick to avoid zero entries explicit removal from the smat_dot_smat
                offset = sp.absolute(csr_codes.data).max() + 1
                csr_codes = smat.csr_matrix(
                    (csr_codes.data + offset, csr_codes.indices,
                     csr_codes.indptr),
                    shape=csr_codes.shape,
                )
                pZ = PyMatrix.init_from(csr_codes, self.dtype)
                csr_labels, pred_csr = clib.multilabel_predict_with_codes(
                    X, self.pW, self.pC, pZ, threads=threads)
                csr_labels.data -= offset
            else:
                pZ = PyMatrix.init_from(csr_codes.sorted_indices(), self.dtype)
                csr_labels, pred_csr = clib.multilabel_predict_with_codes(
                    X, self.pW, self.pC, pZ, threads=threads)
            val = pred_csr.data
            if cond_prob:
                val = cond_prob.transform(val, inplace=True)
                val = cond_prob.combiner(val, csr_labels.data)

            pred_csr = smat_util.sorted_csr(pred_csr, only_topk=only_topk)

        if normalized:
            pred_csr = sk_normalize(pred_csr, axis=1, copy=False, norm="l1")
        return pred_csr
Пример #5
0
    def predict_new(self,
                    X,
                    only_topk=None,
                    csr_codes=None,
                    beam_size=2,
                    max_depth=None,
                    cond_prob=True,
                    normalized=False,
                    threads=-1):
        if max_depth is None:
            max_depth = self.depth
        if cond_prob is None or cond_prob == False:
            cond_prob = PostProcessor(Transform.identity, Combiner.noop)
        if cond_prob == True:
            cond_prob = PostProcessor(Transform.get_lpsvm(3), Combiner.mul)
        assert isinstance(cond_prob, PostProcessor), tpye(cond_prob)

        assert X.shape[1] == self.nr_features
        if self.bias > 0:
            X = smat_util.append_column(X, self.bias)
        pX = PyMatrix.init_from(X, dtype=self.model_chain[0].pW.dtype)
        max_depth = min(self.depth, max_depth)
        pred_csr = csr_codes
        for d in range(max_depth):
            cur_model = self.model_chain[d]
            local_only_topk = only_topk if d == (max_depth - 1) else beam_size
            pred_csr = cur_model.predict_new(pX,
                                             only_topk=local_only_topk,
                                             csr_codes=pred_csr,
                                             cond_prob=cond_prob,
                                             threads=threads)
        if normalized:
            pred_csr = sk_normalize(pred_csr, axis=1, copy=False, norm='l1')
        return pred_csr
Пример #6
0
 def __init__(self, X, Y, C=None, bias=-1.0, dtype=None, Z_pred=None):
     if dtype is None:
         dtype = X.dtype
     self.bias = bias
     if self.bias > 0:
         X = smat_util.append_column(X, self.bias)
     self.pX = PyMatrix.init_from(X, dtype)
     self.pY = PyMatrix.init_from(Y, dtype)
     self.pC = PyMatrix.init_from(C, dtype)
     Z = None if C is None else smat.csr_matrix(self.Y.dot(self.C))
     if Z_pred is not None and Z is not None:
         print("Z", Z.shape)
         print("Z_pred", Z_pred.shape)
         Z = Z + Z_pred
         Z = Z.tocsr()
     self.pZ = PyMatrix.init_from(Z, dtype)  # Z = Y * C
     self.dtype = dtype
Пример #7
0
 def balaced_ordinal_gen(self, kdim, depth, seed, threads=-1):
     assert int(2**sp.log2(kdim)) == kdim
     sp.random.seed(seed)
     random_matrix = sp.randn(self.feat_mat.shape[1], depth)
     X = PyMatrix(self.feat_mat.dot(random_matrix))
     codes = sp.zeros(X.rows, dtype=sp.uint32)
     new_depth = depth * int(sp.log2(kdim))
     clib.get_codes(X, new_depth, Indexer.KDTREE_CYCLIC, seed, codes, threads=threads)
     return codes
Пример #8
0
 def predict_values(self, X, inst_idx, label_idx, out=None, threads=-1):
     assert X.shape[1] == self.nr_features
     if out is None:
         out = sp.zeros(inst_idx.shape, dtype=self.pW.dtype)
     pX = PyMatrix.init_from(X, dtype=self.pW.dtype)
     out = clib.sparse_inner_products(pX,
                                      self.pW,
                                      inst_idx.astype(sp.uint32),
                                      label_idx.astype(sp.uint32),
                                      out,
                                      threads=threads)
     return out
Пример #9
0
 def __init__(self,
              X,
              Y,
              C=None,
              dtype=None,
              Z_pred=None,
              negative_sampling_scheme=None):
     if dtype is None:
         dtype = X.dtype
     self.pX = PyMatrix.init_from(X, dtype)
     self.pY = PyMatrix.init_from(Y, dtype)
     self.pC = PyMatrix.init_from(C, dtype)
     Z = None if C is None else smat.csr_matrix(self.Y.dot(self.C))
     if negative_sampling_scheme is None or negative_sampling_scheme == 1:
         Z = Z
     elif negative_sampling_scheme is not None:
         if negative_sampling_scheme == 0:
             Z = (Z + Z_pred).tocsr()
         elif negative_sampling_scheme == 1:
             Z = Z
         elif negative_sampling_scheme == 2 and Z_pred is not None:
             Z = Z_pred
     self.pZ = PyMatrix.init_from(Z, dtype)  # Z = Y * C
     self.dtype = dtype
Пример #10
0
    def predict_new(self,
                    X,
                    only_topk=None,
                    csr_codes=None,
                    beam_size=2,
                    max_depth=None,
                    cond_prob=True,
                    normalized=False,
                    threads=-1):
        if max_depth is None:
            max_depth = self.depth
        if cond_prob is None or cond_prob == False:
            cond_prob = PostProcessor(Transform.identity, Combiner.noop)
        if cond_prob == True:
            cond_prob = PostProcessor(Transform.get_lpsvm(3), Combiner.mul)
        assert isinstance(cond_prob, PostProcessor), tpye(cond_prob)

        pX = PyMatrix.init_from(X, dtype=self.model_chain[0].pW.dtype)
        max_depth = min(self.depth, max_depth)
        transform = cond_prob.transform if cond_prob else Transform.identity
        pred_csr = csr_codes
        #timer = WallTimer()
        for d in range(max_depth):
            '''
            print('predict at depth {}'.format(d))
            sys.stdout.flush()
            timer.tic()
            '''
            cur_model = self.model_chain[d]
            local_only_topk = only_topk if d == (max_depth - 1) else beam_size
            pred_csr = cur_model.predict_new(pX,
                                             only_topk=local_only_topk,
                                             csr_codes=pred_csr,
                                             transform=transform,
                                             cond_prob=cond_prob,
                                             threads=threads)
            '''
            print('>>> {}ms'.format(timer.toc()))
            sys.stdout.flush()
            '''
            #if cond_prob and normalized: # perform normalization to avoid numerical issue
            #    pred_csr = sk_normalize(pred_csr, axis=1, copy=False, norm='l1')
            #print('d = {} codes:{} nnz:{}'.format(d, pred_csr.shape[1], pred_csr.nnz))
        #pred_csr.data[:] = sp.exp(pred_csr.data[:])
        if normalized:
            pred_csr = sk_normalize(pred_csr, axis=1, copy=False, norm='l1')
        return pred_csr
Пример #11
0
    def predict_new(self,
                    X,
                    only_topk=None,
                    transform=None,
                    csr_codes=None,
                    cond_prob=None,
                    normalized=False,
                    threads=-1):
        assert X.shape[1] == self.nr_features
        if csr_codes is None:
            dense = X.dot(self.W).toarray()
            if transform:
                dense = transform(dense, inplace=True)
            coo = smat_util.dense_to_coo(dense)
            pred_csr = smat_util.sorted_csr_from_coo(coo.shape,
                                                     coo.row,
                                                     coo.col,
                                                     coo.data,
                                                     only_topk=only_topk)
        else:  # csr_codes is given
            assert self.C is not None, "This model does not have C"
            assert X.shape[1] == self.nr_features
            assert csr_codes.shape[0] == X.shape[0]
            assert csr_codes.shape[1] == self.nr_codes
            pZ = PyMatrix.init_from(csr_codes, self.dtype)
            csr_labels, pred_csr = clib.multilabel_predict_with_codes(
                X, self.pW, self.pC, pZ, threads=threads)
            val = pred_csr.data
            if transform:
                val = transform(val, inplace=True)
            if cond_prob:
                val[:] = cond_prob.combiner(val, csr_labels.data)

            pred_csr = smat_util.sorted_csr(pred_csr, only_topk=only_topk)

        if normalized:
            pred_csr = sk_normalize(pred_csr, axis=1, copy=False, norm='l1')
        return pred_csr
Пример #12
0
 def __init__(self, feat_mat):
     self.py_feat_mat = PyMatrix.init_from(feat_mat)
Пример #13
0
def test_svm(datafolder='dataset/Eurlex-4K', depth=3):
    data = Data.load(datafolder, depth=depth)
    X = PyMatrix(data.X, dtype=data.X.dtype)
    #X = data.X
    Y = data.Y
    C = data.C
    only_topk = 20
    topk = 10
    Cp = 1
    Cn = 1
    threshold = 0.01
    #solver_type = L2R_LR_DUAL
    solver_type = L2R_L2LOSS_SVC_DUAL

    # test multi-label with codes
    prob = MLProblem(X, Y, C)
    m = MLModel.train(prob,
                      threshold=threshold,
                      solver_type=solver_type,
                      Cp=Cp,
                      Cn=Cn)
    pred_Y = m.predict(X, only_topk=only_topk)
    print('sparse W with top {}'.format(topk))
    metric = Metrics.generate(Y, pred_Y, topk)
    print(metric)
    '''
    print('|W|^2 = {}'.format((m.W.toarray() * m.W.toarray()).sum()))
    coo = smat_util.dense_to_coo(sp.ones(pred_Y.shape))
    YY = smat_util.sorted_csr(smat.csr_matrix(m.predict_values(X, coo.row, coo.col).reshape(pred_Y.shape)))
    metric = Metrics.generate(Y, YY, topk)
    print(metric)
    YY = smat_util.sorted_csr(smat.csr_matrix(X.dot(m.W)))
    metric = Metrics.generate(Y, YY, topk)
    print(metric)
    '''

    # test hierarchical multi-label
    print('Hierarchical-Multilabel')
    beam_size = 4
    min_labels = 2
    nr_splits = 2
    m = ml_train(prob,
                 hierarchical=True,
                 min_labels=min_labels,
                 threshold=threshold,
                 solver_type=solver_type,
                 Cp=Cp,
                 Cn=Cn)
    print('m.depth = {}'.format(m.depth))
    pred_Y = m.predict(X, beam_size=beam_size, only_topk=only_topk)
    print(pred_Y.shape)
    print('sparse W with top {}'.format(topk))
    metric = Metrics.generate(Y, pred_Y, topk)
    print(metric)
    '''
    max_depth = 2
    print('Predict up to depth = {}'.format(max_depth))
    pred_Y = m.predict(X, only_topk=only_topk, max_depth=max_depth)
    trueY = Y.copy()
    for d in range(m.depth - 1, max_depth - 1, -1):
        trueY = trueY.dot(m.model_chain[d].C)
    metric = Metrics.generate(trueY, pred_Y, topk)
    print(metric)
    #print('|W|^2 = {}'.format((m.W.toarray() * m.W.toarray()).sum()))
    '''

    # test pure multi-label
    print('pure one-vs-rest Multi-label')
    prob = MLProblem(X, Y)
    m = MLModel.train(prob,
                      threshold=threshold,
                      solver_type=solver_type,
                      Cp=Cp,
                      Cn=Cn)
    pred_Y = m.predict(X, only_topk=only_topk)
    metric = Metrics.generate(Y, pred_Y, topk)
    print(metric)
    print('|W|^2 = {}'.format((m.W.toarray() * m.W.toarray()).sum()))