def load_binary_clf_tracks() -> typing.List[Track]: """Return binary classification tracks.""" return [ Track( name="Phishing", dataset=datasets.Phishing(), metric=metrics.Accuracy() + metrics.F1(), ), Track( name="Bananas", dataset=datasets.Bananas(), metric=metrics.Accuracy() + metrics.F1(), ), ]
def __init__( self, n_models: int = 10, max_features: typing.Union[bool, str, int] = "sqrt", lambda_value: int = 6, metric: metrics.MultiClassMetric = metrics.Accuracy(), disable_weighted_vote=False, drift_detector: typing.Union[base.DriftDetector, None] = ADWIN(delta=0.001), warning_detector: typing.Union[base.DriftDetector, None] = ADWIN(delta=0.01), # Tree parameters grace_period: int = 50, max_depth: int = None, split_criterion: str = "info_gain", split_confidence: float = 0.01, tie_threshold: float = 0.05, leaf_prediction: str = "nba", nb_threshold: int = 0, nominal_attributes: list = None, splitter: Splitter = None, binary_split: bool = False, max_size: int = 32, memory_estimate_period: int = 2_000_000, stop_mem_management: bool = False,
def __init__(self): optimizer = optim.SGD(0.1) self.model = compose.Pipeline( preprocessing.StandardScaler(), linear_model.LogisticRegression(optimizer)) self.metric = metrics.Accuracy() self.count = 0
def test_compose(): metrics.MAE() + metrics.MSE() metrics.Accuracy() + metrics.LogLoss() with pytest.raises(ValueError): _ = metrics.MSE() + metrics.LogLoss() with pytest.raises(ValueError): _ = metrics.MSE() + metrics.MAE() + metrics.LogLoss()
def __init__( self, n_models: int = 10, max_features: typing.Union[bool, str, int] = "sqrt", lambda_value: int = 6, metric: metrics.MultiClassMetric = metrics.Accuracy(), disable_weighted_vote=False, drift_detector: typing.Union[base.DriftDetector, None] = ADWIN(delta=0.001), warning_detector: typing.Union[base.DriftDetector, None] = ADWIN(delta=0.01), # Tree parameters grace_period: int = 50, max_depth: int = None, split_criterion: str = "info_gain", split_confidence: float = 0.01, tie_threshold: float = 0.05, leaf_prediction: str = "nba", nb_threshold: int = 0, nominal_attributes: list = None, attr_obs: str = "gaussian", attr_obs_params: dict = None, max_size: int = 32, memory_estimate_period: int = 2000000, seed: int = None, **kwargs, ): super().__init__( n_models=n_models, max_features=max_features, lambda_value=lambda_value, metric=metric, disable_weighted_vote=disable_weighted_vote, drift_detector=drift_detector, warning_detector=warning_detector, seed=seed, ) self._n_samples_seen = 0 self._base_member_class = ForestMemberClassifier # Tree parameters self.grace_period = grace_period self.max_depth = max_depth self.split_criterion = split_criterion self.split_confidence = split_confidence self.tie_threshold = tie_threshold self.leaf_prediction = leaf_prediction self.nb_threshold = nb_threshold self.nominal_attributes = nominal_attributes self.attr_obs = attr_obs self.attr_obs_params = attr_obs_params self.max_size = max_size self.memory_estimate_period = memory_estimate_period self.kwargs = kwargs
def __init__(self): """Create a persistent model file if there isn't one. If one exists, use it.""" self.file_path = 'models/decision_tree.joblib' self.include_hunger = False self.accuracy_metric_float = 0.0 self.metrics = metrics.Accuracy() if path.exists(self.file_path): self.model = load(self.file_path) else: self.model = tree.HoeffdingTreeClassifier(grace_period=20) self.save_model()
def __init__(self, cm: "metrics.ConfusionMatrix" = None): self.cm = metrics.ConfusionMatrix() if cm is None else cm self.accuracy = metrics.Accuracy(cm=self.cm) self.kappa = metrics.CohenKappa(cm=self.cm) self.kappa_m = metrics.KappaM(cm=self.cm) self.kappa_t = metrics.KappaT(cm=self.cm) self.recall = metrics.Recall(cm=self.cm) self.micro_recall = metrics.MicroRecall(cm=self.cm) self.macro_recall = metrics.MacroRecall(cm=self.cm) self.precision = metrics.Precision(cm=self.cm) self.micro_precision = metrics.MicroPrecision(cm=self.cm) self.macro_precision = metrics.MacroPrecision(cm=self.cm) self.f1 = metrics.F1(cm=self.cm) self.micro_f1 = metrics.MicroF1(cm=self.cm) self.macro_f1 = metrics.MacroF1(cm=self.cm) self.geometric_mean = metrics.GeometricMean(cm=self.cm)
def __init__(self, step, name): self.name = name self.optimizer = SynchronousSGD(0.01, name, None) self.model = compose.Pipeline( preprocessing.StandardScaler(), linear_model.LogisticRegression(self.optimizer)) self.metrics = [ metrics.Accuracy(), metrics.MAE(), metrics.RMSE(), metrics.Precision(), metrics.Recall() ] self.count = 0 if step is None: self.step = 50 else: self.step = int(step)
def __init__(self): """Create a persistent model file if there isn't one. If one exists, use it.""" self.file_path = 'models/decision_tree.joblib' self.include_hunger = False self.accuracy_metric_float = 0.0 self.accuracy = metrics.Accuracy() if path.exists(self.file_path): self.model = load(self.file_path) else: # self.model = compat.convert_sklearn_to_river( # estimator=MLPClassifier(random_state=1, max_iter=300), # classes=[0, 1] # ) self.model = compat.convert_sklearn_to_river( estimator=BNB(binarize=.1), classes=[0, 1] ) self.save_model()
from river import preprocessing from river import evaluate from river import metrics from river import datasets from river import tree from river import compose from river import optim X_y = synth.PredictionInfluenceStream(stream=[ synth.RandomRBF(seed_model=42, seed_sample=42, n_classes=2, n_features=4, n_centroids=20), synth.RandomRBF(seed_model=41, seed_sample=49, n_classes=2, n_features=4, n_centroids=20) ]) model = preprocessing.StandardScaler() model |= tree.HoeffdingAdaptiveTreeClassifier(grace_period=100, split_confidence=1e-5, leaf_prediction='nb', nb_threshold=10, seed=0) metric = metrics.Accuracy() evaluate.evaluate_influential(X_y, model, metric, print_every=100)
y_pred = [ np.random.dirichlet(np.ones(3)).tolist() for _ in range(n) ] yield y_true, y_pred, sample_weights if isinstance(metric, base.RegressionMetric): yield ([random.random() for _ in range(n)], [random.random() for _ in range(n)], sample_weights) def partial(f, **kwargs): return functools.update_wrapper(functools.partial(f, **kwargs), f) TEST_CASES = [ (metrics.Accuracy(), sk_metrics.accuracy_score), (metrics.Precision(), sk_metrics.precision_score), (metrics.MacroPrecision(), partial(sk_metrics.precision_score, average='macro')), (metrics.MicroPrecision(), partial(sk_metrics.precision_score, average='micro')), (metrics.WeightedPrecision(), partial(sk_metrics.precision_score, average='weighted')), (metrics.Recall(), sk_metrics.recall_score), (metrics.MacroRecall(), partial(sk_metrics.recall_score, average='macro')), (metrics.MicroRecall(), partial(sk_metrics.recall_score, average='micro')), (metrics.WeightedRecall(), partial(sk_metrics.recall_score, average='weighted')), (metrics.FBeta(beta=.5), partial(sk_metrics.fbeta_score, beta=.5)), (metrics.MacroFBeta(beta=.5), partial(sk_metrics.fbeta_score, beta=.5, average='macro')),
results["Memory"] = [model._memory_usage] * model.population_size results["Name"] = [] results["Model Performance"] = [] for idx, me in enumerate(population_metrics): results["Name"].append(f'Individual {idx}') results["Model Performance"].append(me.get()) #results["Model Performance"].append(model.population_metrics[idx].get()) yield results next_checkpoint = next(checkpoints, None) def evo_rbf_accuracy_50_001_track(n_samples=10_000, seed=42): n_centroids = 50 change_speed= .001 dataset = synth.RandomRBFDrift(seed_model=7, seed_sample=seed,n_classes=5,n_features=50, n_centroids=n_centroids, change_speed=change_speed).take(n_samples) track = EvoTrack("RBF(50,0.001)", dataset, metrics.Accuracy(), n_samples) return track def evo_rbf_accuracy_10_0001_track(n_samples=10_000, seed=42): n_centroids = 10 change_speed= .0001 dataset = synth.RandomRBFDrift(seed_model=7, seed_sample=seed,n_classes=5,n_features=50, n_centroids=n_centroids, change_speed=change_speed).take(n_samples) track = EvoTrack("RBF(10,0.0001)", dataset, metrics.Accuracy(), n_samples) return track def evo_rbf_accuracy_10_001_track(n_samples=10_000, seed=42): n_centroids = 10 change_speed= .001 dataset = synth.RandomRBFDrift(seed_model=7, seed_sample=seed,n_classes=5,n_features=50, n_centroids=n_centroids, change_speed=change_speed).take(n_samples) track = EvoTrack("RBF(10,0.001)", dataset, metrics.Accuracy(), n_samples) return track
def __init__(self): self.accuracy_metric_float = 0.0 self.accuracy = metrics.Accuracy() self.include_hunger = True