def predict( self, X, only_topk=None, csr_codes=None, cond_prob=None, normalized=False, threads=-1, ): assert X.shape[1] == self.nr_features if csr_codes is None: dense = X.dot(self.W).toarray() if cond_prob: dense = cond_prob.transform(dense, inplace=True) coo = smat_util.dense_to_coo(dense) pred_csr = smat_util.sorted_csr_from_coo(coo.shape, coo.row, coo.col, coo.data, only_topk=only_topk) else: # csr_codes is given assert self.C is not None, "This model does not have C" assert X.shape[1] == self.nr_features assert csr_codes.shape[0] == X.shape[0] assert csr_codes.shape[1] == self.nr_codes if (csr_codes.data == 0).sum() != 0: # this is a trick to avoid zero entries explicit removal from the smat_dot_smat offset = sp.absolute(csr_codes.data).max() + 1 csr_codes = smat.csr_matrix( (csr_codes.data + offset, csr_codes.indices, csr_codes.indptr), shape=csr_codes.shape, ) csr_labels = (csr_codes.dot(self.C.T)).tocsr() csr_labels.data -= offset else: csr_labels = (csr_codes.dot(self.C.T)).tocsr() nnz_of_insts = csr_labels.indptr[1:] - csr_labels.indptr[:-1] inst_idx = sp.repeat(sp.arange(X.shape[0], dtype=sp.uint32), nnz_of_insts) label_idx = csr_labels.indices.astype(sp.uint32) val = self.predict_values(X, inst_idx, label_idx, threads=threads) if cond_prob: val = cond_prob.transform(val, inplace=True) val = cond_prob.combiner(val, csr_labels.data) pred_csr = smat_util.sorted_csr_from_coo(csr_labels.shape, inst_idx, label_idx, val, only_topk=only_topk) if normalized: pred_csr = sk_normalize(pred_csr, axis=1, copy=False, norm="l1") return pred_csr
def predict_with_coo_labels(self, X, inst_idx, label_idx, only_topk=None): val = self.predict_values(X, inst_idx, label_idx) shape = (X.shape[0], self.nr_labels) pred_csr = smat_util.sorted_csr_from_coo(shape, inst_idx, label_idx, val, only_topk=only_topk) return pred_csr
def predict_new( self, X, only_topk=None, csr_codes=None, cond_prob=None, normalized=False, threads=-1, ): assert X.shape[1] == self.nr_features if csr_codes is None: dense = X.dot(self.W).toarray() if cond_prob: dense = cond_prob.transform(dense, inplace=True) coo = smat_util.dense_to_coo(dense) pred_csr = smat_util.sorted_csr_from_coo(coo.shape, coo.row, coo.col, coo.data, only_topk=only_topk) else: # csr_codes is given assert self.C is not None, "This model does not have C" assert X.shape[1] == self.nr_features assert csr_codes.shape[0] == X.shape[0] assert csr_codes.shape[1] == self.nr_codes if not csr_codes.has_sorted_indices: csr_codes = csr_codes.sorted_indices() if (csr_codes.data == 0).sum() != 0: # this is a trick to avoid zero entries explicit removal from the smat_dot_smat offset = sp.absolute(csr_codes.data).max() + 1 csr_codes = smat.csr_matrix( (csr_codes.data + offset, csr_codes.indices, csr_codes.indptr), shape=csr_codes.shape, ) pZ = PyMatrix.init_from(csr_codes, self.dtype) csr_labels, pred_csr = clib.multilabel_predict_with_codes( X, self.pW, self.pC, pZ, threads=threads) csr_labels.data -= offset else: pZ = PyMatrix.init_from(csr_codes.sorted_indices(), self.dtype) csr_labels, pred_csr = clib.multilabel_predict_with_codes( X, self.pW, self.pC, pZ, threads=threads) val = pred_csr.data if cond_prob: val = cond_prob.transform(val, inplace=True) val = cond_prob.combiner(val, csr_labels.data) pred_csr = smat_util.sorted_csr(pred_csr, only_topk=only_topk) if normalized: pred_csr = sk_normalize(pred_csr, axis=1, copy=False, norm="l1") return pred_csr
def predict(self, X, only_topk=None, transform=None, csr_codes=None, cond_prob=None, normalized=False, threads=-1): assert X.shape[1] == self.nr_features if csr_codes is None: dense = X.dot(self.W).toarray() if transform: dense = transform(dense, inplace=True) coo = smat_util.dense_to_coo(dense) pred_csr = smat_util.sorted_csr_from_coo(coo.shape, coo.row, coo.col, coo.data, only_topk=only_topk) else: # csr_codes is given assert self.C is not None, "This model does not have C" assert X.shape[1] == self.nr_features assert csr_codes.shape[0] == X.shape[0] assert csr_codes.shape[1] == self.nr_codes csr_labels = (csr_codes.dot(self.C.T)).tocsr() nnz_of_insts = csr_labels.indptr[1:] - csr_labels.indptr[:-1] inst_idx = sp.repeat(sp.arange(X.shape[0], dtype=sp.uint32), nnz_of_insts) label_idx = csr_labels.indices.astype(sp.uint32) val = self.predict_values(X, inst_idx, label_idx, threads=threads) if transform: val = transform(val, inplace=True) if cond_prob: val[:] = cond_prob.combiner(val, csr_labels.data) pred_csr = smat.csr_matrix((val, label_idx, csr_labels.indptr), shape=csr_labels.shape) pred_csr = smat_util.sorted_csr(pred_csr, only_topk=only_topk) #pred_csr = self.predict_with_coo_labels(X, coo_labels.row, coo_labels.cols, only_topk) if normalized: pred_csr = sk_normalize(pred_csr, axis=1, copy=False, norm='l1') return pred_csr
def predict_new(self, X, only_topk=None, transform=None, csr_codes=None, cond_prob=None, normalized=False, threads=-1): assert X.shape[1] == self.nr_features if csr_codes is None: dense = X.dot(self.W).toarray() if transform: dense = transform(dense, inplace=True) coo = smat_util.dense_to_coo(dense) pred_csr = smat_util.sorted_csr_from_coo(coo.shape, coo.row, coo.col, coo.data, only_topk=only_topk) else: # csr_codes is given assert self.C is not None, "This model does not have C" assert X.shape[1] == self.nr_features assert csr_codes.shape[0] == X.shape[0] assert csr_codes.shape[1] == self.nr_codes pZ = PyMatrix.init_from(csr_codes, self.dtype) csr_labels, pred_csr = clib.multilabel_predict_with_codes( X, self.pW, self.pC, pZ, threads=threads) val = pred_csr.data if transform: val = transform(val, inplace=True) if cond_prob: val[:] = cond_prob.combiner(val, csr_labels.data) pred_csr = smat_util.sorted_csr(pred_csr, only_topk=only_topk) if normalized: pred_csr = sk_normalize(pred_csr, axis=1, copy=False, norm='l1') return pred_csr
def sorted_csr(csr, only_topk=None): assert isinstance(csr, smat.csr_matrix) row_idx = sp.repeat(sp.arange(csr.shape[0], dtype=sp.uint32), csr.indptr[1:] - csr.indptr[:-1]) return smat_util.sorted_csr_from_coo(csr.shape, row_idx, csr.indices, csr.data, only_topk)