def _compute_lower_bound(self, log_resp, log_prob_norm, counts): """Estimate the lower bound of the model. The lower bound on the likelihood (of the training data with respect to the model) is used to detect the convergence and has to decrease at each iteration. Parameters ---------- X : array-like, shape (n_samples, n_features) log_resp : array, shape (n_samples, n_components) Logarithm of the posterior probabilities (or responsibilities) of the point of each sample in X. log_prob_norm : float Logarithm of the probability of each sample in X. Returns ------- lower_bound : float """ # Contrary to the original formula, we have done some simplification # and removed all the constant terms. n_features, = self.mean_prior_.shape # We removed `.5 * n_features * np.log(self.degrees_of_freedom_)` # because the precision matrix is normalized. log_det_precisions_chol = (_compute_log_det_cholesky( self.precisions_cholesky_, self.covariance_type, n_features) - .5 * n_features * np.log(self.degrees_of_freedom_)) if self.covariance_type == 'tied': log_wishart = self.n_components * np.float64(_log_wishart_norm( self.degrees_of_freedom_, log_det_precisions_chol, n_features)) else: log_wishart = np.sum(_log_wishart_norm( self.degrees_of_freedom_, log_det_precisions_chol, n_features)) if self.weight_concentration_prior_type == 'dirichlet_process': log_norm_weight = -np.sum(betaln(self.weight_concentration_[0], self.weight_concentration_[1])) else: log_norm_weight = _log_dirichlet_norm(self.weight_concentration_) H_resp = (np.exp(log_resp) * log_resp).sum(1).dot(counts) return (-H_resp - log_wishart - log_norm_weight - 0.5 * n_features * np.sum(np.log(self.mean_precision_)))
def test_compute_log_det_cholesky(): n_features = 2 rand_data = RandomData(np.random.RandomState(0)) for covar_type in COVARIANCE_TYPE: covariance = rand_data.covariances[covar_type] if covar_type == 'full': predected_det = np.array([linalg.det(cov) for cov in covariance]) elif covar_type == 'tied': predected_det = linalg.det(covariance) elif covar_type == 'diag': predected_det = np.array([np.prod(cov) for cov in covariance]) elif covar_type == 'spherical': predected_det = covariance ** n_features # We compute the cholesky decomposition of the covariance matrix expected_det = _compute_log_det_cholesky(_compute_precision_cholesky( covariance, covar_type), covar_type, n_features=n_features) assert_array_almost_equal(expected_det, - .5 * np.log(predected_det))
def __init__(self, estimator, classifier, dtype='float', verbose=0): self.dtype = dtype self.verbose = verbose n_components, n_features = estimator.means_.shape covariance_type = estimator.covariance_type precisions_chol = estimator.precisions_cholesky_ # Convert all types to "full" covariance matrix # TODO: native aupport for tied/diag/spherical precisions_chol = convert_to_full(estimator.means_, precisions_chol, covariance_type) covariance_type = 'full' from sklearn.mixture._gaussian_mixture import _compute_log_det_cholesky log_det = _compute_log_det_cholesky(precisions_chol, covariance_type, n_features) self._log_det = log_det self._means = estimator.means_.copy() self._covariance_type = covariance_type self._precisions_col = precisions_chol self._log_weights = get_log_weights(estimator)
def _estimate_log_gaussian_prob(X, means, precisions_chol, covariance_type): """Estimate the log Gaussian probability. Parameters ---------- X : array-like of shape (n_samples, n_features) means : array-like of shape (n_components, n_features) precisions_chol : array-like Cholesky decompositions of the precision matrices. 'full' : shape of (n_components, n_features, n_features) 'tied' : shape of (n_features, n_features) 'diag' : shape of (n_components, n_features) 'spherical' : shape of (n_components,) covariance_type : {'full', 'tied', 'diag', 'spherical'} Returns ------- log_prob : array, shape (n_samples, n_components) """ n_samples, n_features = X.shape n_components, _ = means.shape # det(precision_chol) is half of det(precision) log_det = _compute_log_det_cholesky(precisions_chol, covariance_type, n_features) if covariance_type == 'full': log_prob = np.empty((n_samples, n_components)) #print('mm', n_samples, n_features, n_components) for i, x in enumerate(X): #print('x', i, x) for k, (mu, prec_chol) in enumerate(zip(means, precisions_chol)): pp = 0.0 for f in range(x.shape[0]): dot_m = 0.0 dot_x = 0.0 for p in range(prec_chol.shape[0]): dot_m += (mu[p] * prec_chol[p, f]) dot_x += (x[p] * prec_chol[p, f]) y = (dot_x - dot_m) pp += (y * y) #print('k', k, '\n', mu, '\n', prec_chol) dot_x = np.dot(x, prec_chol) dot_m = np.dot(mu, prec_chol) y = dot_x - dot_m #print('dot_x', dot_x) #print('dot_m', dot_m) #print('y', y) p = np.sum(np.square(y), axis=0) # sum over features #assert p == pp, (p, pp) #print("log_prob", i, k, p) log_prob[i, k] = p elif covariance_type == 'tied': log_prob = np.empty((n_samples, n_components)) for k, mu in enumerate(means): y = np.dot(X, precisions_chol) - np.dot(mu, precisions_chol) log_prob[:, k] = np.sum(np.square(y), axis=1) elif covariance_type == 'diag': precisions = precisions_chol**2 log_prob = (np.sum( (means**2 * precisions), 1) - 2. * np.dot(X, (means * precisions).T) + np.dot(X**2, precisions.T)) elif covariance_type == 'spherical': precisions = precisions_chol**2 log_prob = (np.sum(means**2, 1) * precisions - 2 * np.dot(X, means.T * precisions) + np.outer(row_norms(X, squared=True), precisions)) s = -.5 * (n_features * np.log(2 * np.pi) + log_prob) + log_det #print('s', s, 'log_det\n', log_det) return s
def convert_sklearn_gaussian_mixture(scope, operator, container): """ Converter for *GaussianMixture*, *BayesianGaussianMixture*. Parameters which change the prediction function: * *covariance_type* """ X = operator.inputs[0] out = operator.outputs op = operator.raw_operator n_features = X.type.shape[1] n_components = op.means_.shape[0] opv = container.target_opset options = container.get_options(op, dict(score_samples=None,combined_reducesum=None)) add_score = options.get('score_samples', False) add_reduced = options.get('combined_reducesum', False) if add_score and add_reduced and len(out) != 3: raise RuntimeError("3 outputs are expected.") if isinstance(op, BayesianGaussianMixture): raise NotImplementedError( "Converter for BayesianGaussianMixture is not implemented.") # All comments come from scikit-learn code and tells # which functions is being onnxified. # def _estimate_weighted_log_prob(self, X): # self._estimate_log_prob(X) + self._estimate_log_weights() log_weights = np.log(op.weights_) # self._estimate_log_weights() # self._estimate_log_prob(X) log_det = _compute_log_det_cholesky( op.precisions_cholesky_, op.covariance_type, n_features) if op.covariance_type == 'full': # shape(op.means_) = (n_components, n_features) # shape(op.precisions_cholesky_) = # (n_components, n_features, n_features) # log_prob = np.empty((n_samples, n_components)) # for k, (mu, prec_chol) in enumerate(zip(means, precisions_chol)): # y = np.dot(X, prec_chol) - np.dot(mu, prec_chol) # log_prob[:, k] = np.sum(np.square(y), axis=1) ys = [] for c in range(n_components): prec_chol = op.precisions_cholesky_[c, :, :] cst = - np.dot(op.means_[c, :], prec_chol) y = OnnxGemm(X, prec_chol.astype(container.dtype), cst.astype(container.dtype), alpha=1., beta=1., op_version=opv) y2s = OnnxReduceSum(OnnxMul(y, y, op_version=opv), axes=[1], op_version=opv) ys.append(y2s) log_prob = OnnxConcat(*ys, axis=1, op_version=opv) elif op.covariance_type == 'tied': # shape(op.means_) = (n_components, n_features) # shape(op.precisions_cholesky_) = # (n_features, n_features) # log_prob = np.empty((n_samples, n_components)) # for k, mu in enumerate(means): # y = np.dot(X, precisions_chol) - np.dot(mu, precisions_chol) # log_prob[:, k] = np.sum(np.square(y), axis=1) precisions_chol = op.precisions_cholesky_ ys = [] for f in range(n_components): cst = - np.dot(op.means_[f, :], precisions_chol) y = OnnxGemm(X, precisions_chol.astype(container.dtype), cst.astype(container.dtype), alpha=1., beta=1., op_version=opv) y2s = OnnxReduceSumSquare(y, axes=[1], op_version=opv) ys.append(y2s) log_prob = OnnxConcat(*ys, axis=1, op_version=opv) elif op.covariance_type == 'diag': # shape(op.means_) = (n_components, n_features) # shape(op.precisions_cholesky_) = # (n_components, n_features) # precisions = precisions_chol ** 2 # log_prob = (np.sum((means ** 2 * precisions), 1) - # 2. * np.dot(X, (means * precisions).T) + # np.dot(X ** 2, precisions.T)) precisions = op.precisions_cholesky_ ** 2 mp = np.sum((op.means_ ** 2 * precisions), 1) zeros = np.zeros((n_components, )) xmp = OnnxGemm( X, (op.means_ * precisions).T.astype(container.dtype), zeros.astype(container.dtype), alpha=-2., beta=0., op_version=opv) term = OnnxGemm(OnnxMul(X, X, op_version=opv), precisions.T, zeros, alpha=1., beta=0., op_version=opv) log_prob = OnnxAdd(OnnxAdd(mp, xmp, op_version=opv), term, op_version=opv) elif op.covariance_type == 'spherical': # shape(op.means_) = (n_components, n_features) # shape(op.precisions_cholesky_) = (n_components, ) # precisions = precisions_chol ** 2 # log_prob = (np.sum(means ** 2, 1) * precisions - # 2 * np.dot(X, means.T * precisions) + # np.outer(row_norms(X, squared=True), precisions)) zeros = np.zeros((n_components, )) precisions = op.precisions_cholesky_ ** 2 normX = OnnxReduceSumSquare(X, axes=[1], op_version=opv) outer = OnnxGemm( normX, precisions[np.newaxis, :].astype(container.dtype), zeros.astype(container.dtype), alpha=1., beta=1., op_version=opv) xmp = OnnxGemm( X, (op.means_.T * precisions).astype(container.dtype), zeros.astype(container.dtype), alpha=-2., beta=0., op_version=opv) mp = np.sum(op.means_ ** 2, 1) * precisions log_prob = OnnxAdd(mp, OnnxAdd(xmp, outer, op_version=opv), op_version=opv) else: raise RuntimeError("Unknown op.covariance_type='{}'. Upgrade " "to a more recent version of skearn-onnx " "or raise an issue.".format(op.covariance_type)) # -.5 * (cst + log_prob) + log_det cst = np.array([n_features * np.log(2 * np.pi)]) add = OnnxAdd(cst, log_prob, op_version=opv) mul = OnnxMul(add, np.array([-0.5]), op_version=opv) if isinstance(log_det, float): log_det = np.array([log_det]) weighted_log_prob = OnnxAdd(OnnxAdd(mul, log_det, op_version=opv), log_weights, op_version=opv) mxlabels = OnnxReduceMax(weighted_log_prob, axes=[1], op_version=opv) zeros = OnnxEqual( OnnxSub(weighted_log_prob, mxlabels, op_version=opv), np.array([0], dtype=container.dtype), op_version=opv) toint = OnnxCast(zeros, to=onnx_proto.TensorProto.INT64, op_version=opv) mulind = OnnxMul(toint, np.arange(n_components).astype(np.int64), op_version=opv) labels = OnnxReduceMax(mulind, axes=[1], output_names=out[:1], op_version=opv) # def _estimate_log_prob_resp(): # np.exp(log_resp) # weighted_log_prob = self._estimate_weighted_log_prob(X) # log_prob_norm = logsumexp(weighted_log_prob, axis=1) # with np.errstate(under='ignore'): # log_resp = weighted_log_prob - log_prob_norm[:, np.newaxis] if add_score: # log_prob_norm = OnnxReduceLogSumExp( max_weight = OnnxReduceMax(weighted_log_prob, axes=[1], op_version=opv) log_prob_norm_demax = OnnxLog( OnnxReduceSum( OnnxExp( OnnxSub(weighted_log_prob, max_weight, op_version=opv), op_version=opv), axes=[1], op_version=opv), op_version=opv) # log_prob_norm = OnnxAdd(log_prob_norm_demax, max_weight, # op_version=opv, output_names=out[2:3]) log_prob_norm = OnnxAdd(log_prob_norm_demax, max_weight, op_version=opv) score=OnnxReduceMean(log_prob_norm, op_version=opv, output_names=out[2:3]) log_resp = OnnxSub(weighted_log_prob, log_prob_norm, op_version=opv) # probabilities probs = OnnxExp(log_resp, output_names=out[1:2], op_version=opv) # final labels.add_to(scope, container) probs.add_to(scope, container) if add_score: score.add_to(scope, container)
def _estimate_log_gaussian_prob(X, means, precisions_chol, covariance_type, dtype, op_version, combined_reducesum): """ Converts the same function into ONNX. Returns log probabilities. """ n_components = means.shape[0] n_features = means.shape[1] opv = op_version # self._estimate_log_prob(X) log_det = _compute_log_det_cholesky( precisions_chol, covariance_type, n_features).astype( dtype) if covariance_type == 'full': # shape(op.means_) = (n_components, n_features) # shape(op.precisions_cholesky_) = # (n_components, n_features, n_features) # log_prob = np.empty((n_samples, n_components)) # for k, (mu, prec_chol) in enumerate(zip(means, precisions_chol)): # y = np.dot(X, prec_chol) - np.dot(mu, prec_chol) # log_prob[:, k] = np.sum(np.square(y), axis=1) ys = [] for c in range(n_components): prec_chol = precisions_chol[c, :, :] cst = - np.dot(means[c, :], prec_chol) y = OnnxGemm(X, prec_chol.astype(dtype), cst.astype(dtype), alpha=1., beta=1., op_version=opv) if combined_reducesum: y2s = OnnxReduceSumApi11(OnnxMul(y, y, op_version=opv), axes=[1], op_version=opv) else: y2s = OnnxReduceSumSquare(y, axes=[1], op_version=opv) ys.append(y2s) log_prob = OnnxConcat(*ys, axis=1, op_version=opv) elif covariance_type == 'tied': # shape(op.means_) = (n_components, n_features) # shape(op.precisions_cholesky_) = # (n_features, n_features) # log_prob = np.empty((n_samples, n_components)) # for k, mu in enumerate(means): # y = np.dot(X, precisions_chol) - np.dot(mu, precisions_chol) # log_prob[:, k] = np.sum(np.square(y), axis=1) ys = [] for f in range(n_components): cst = - np.dot(means[f, :], precisions_chol) y = OnnxGemm(X, precisions_chol.astype(dtype), cst.astype(dtype), alpha=1., beta=1., op_version=opv) if combined_reducesum: y2s = OnnxReduceSumApi11(OnnxMul(y, y, op_version=opv), axes=[1], op_version=opv) else: y2s = OnnxReduceSumSquare(y, axes=[1], op_version=opv) ys.append(y2s) log_prob = OnnxConcat(*ys, axis=1, op_version=opv) elif covariance_type == 'diag': # shape(op.means_) = (n_components, n_features) # shape(op.precisions_cholesky_) = # (n_components, n_features) # precisions = precisions_chol ** 2 # log_prob = (np.sum((means ** 2 * precisions), 1) - # 2. * np.dot(X, (means * precisions).T) + # np.dot(X ** 2, precisions.T)) precisions = (precisions_chol ** 2).astype(dtype) mp = np.sum((means ** 2 * precisions), 1).astype(dtype) zeros = np.zeros((n_components, ), dtype=dtype) xmp = OnnxGemm( X, (means * precisions).T.astype(dtype), zeros, alpha=-2., beta=0., op_version=opv) term = OnnxGemm(OnnxMul(X, X, op_version=opv), precisions.T.astype(dtype), zeros, alpha=1., beta=0., op_version=opv) log_prob = OnnxAdd( OnnxAdd(mp.astype(dtype), xmp, op_version=opv), term, op_version=opv) elif covariance_type == 'spherical': # shape(op.means_) = (n_components, n_features) # shape(op.precisions_cholesky_) = (n_components, ) # precisions = precisions_chol ** 2 # log_prob = (np.sum(means ** 2, 1) * precisions - # 2 * np.dot(X, means.T * precisions) + # np.outer(row_norms(X, squared=True), precisions)) zeros = np.zeros((n_components, ), dtype=dtype) precisions = (precisions_chol ** 2).astype(dtype) if combined_reducesum: normX = OnnxReduceSumApi11(OnnxMul(X, X, op_version=opv), axes=[1], op_version=opv) else: normX = OnnxReduceSumSquare(X, axes=[1], op_version=opv) outer = OnnxGemm( normX, precisions[np.newaxis, :].astype(dtype), zeros.astype(dtype), alpha=1., beta=1., op_version=opv) xmp = OnnxGemm( X, (means.T * precisions).astype(dtype), zeros, alpha=-2., beta=0., op_version=opv) mp = (np.sum(means ** 2, 1) * precisions).astype(dtype) log_prob = OnnxAdd(mp, OnnxAdd(xmp, outer, op_version=opv), op_version=opv) else: raise RuntimeError("Unknown op.covariance_type='{}'. Upgrade " "to a more recent version of skearn-onnx " "or raise an issue.".format(covariance_type)) # -.5 * (cst + log_prob) + log_det cst = np.array([n_features * np.log(2 * np.pi)]).astype(dtype) add = OnnxAdd(cst, log_prob, op_version=opv) mul = OnnxMul(add, np.array([-0.5], dtype=dtype), op_version=opv) if isinstance(log_det, (np.float32, np.float64, float)): log_det = np.array([log_det], dtype=dtype) return OnnxAdd(mul, log_det.astype(dtype), op_version=opv)