def objective(X, y, clf, meantype=meantype, block_size=block_size, percentage=0.01): if binary: sample_objectives = vec_logit(clf.decision_function(X) * y) else: scores = clf.decision_function(X) sample_objectives = -scores[np.arange(X.shape[0]), y] + logsumexp( scores, axis=1) if meantype == "ordinary": obj = sample_objectives.mean() elif meantype == "ch": obj = holland_catoni_estimator(sample_objectives) elif meantype == "mom": obj = median_of_means(sample_objectives, int(block_size * len(sample_objectives))) elif meantype == "tmean": obj = fast_trimmed_mean(sample_objectives, len(sample_objectives), percentage) else: raise ValueError("unknown mean") if penalty != "none": obj += lamda * penalties[penalty](clf.coef_) return obj
def accuracy(X, y, clf, meantype=meantype, block_size=block_size, percentage=0.01): if binary: scores = clf.decision_function(X) # clf.predict(X) decisions = ((y * scores) > 0).astype(int).astype(float) else: predictions = clf.predict(X) decisions = (y == predictions).astype(int).astype(float) if meantype == "ordinary": acc = decisions.mean() elif meantype == "ch": acc = holland_catoni_estimator(decisions) elif meantype == "mom": acc = median_of_means(decisions, int(block_size * len(decisions))) elif meantype == "tmean": acc = fast_trimmed_mean(decisions, len(decisions), percentage=percentage) else: raise ValueError("unknown mean") return acc
def risk(X, y, clf, meantype=meantype, block_size=block_size, percentage=percentage): if loss == "leastsquares": objectives = 0.5 * ((clf.decision_function(X) - y) ** 2) else: ValueError("unimplemented loss %s"%loss) if meantype == "ordinary": obj = objectives.mean() elif meantype == "catoni": obj = holland_catoni_estimator(objectives) elif meantype == "mom": obj = median_of_means(objectives, int(block_size * len(objectives))) elif meantype == "tmean": obj = fast_trimmed_mean(objectives, len(objectives), percentage) else: raise ValueError("unknown mean") if penalty: obj += lamda * penalties[penalty](clf.coef_) return obj