def _unit_test_params(cls): yield { "models": [ linear_model.LogisticRegression(), tree.HoeffdingTreeClassifier(), naive_bayes.GaussianNB(), ] }
def test_decision_tree_max_depth(): model = tree.HoeffdingTreeClassifier() max_depths = [1, 2, 3, 4, 5, 6] models = utils.expand_param_grid(model, {"max_depth": max_depths}) for model, max_depth in zip(models, max_depths): assert model.max_depth == max_depth
def test_class_splitter(dataset, splitter): model = tree.HoeffdingTreeClassifier(splitter=splitter, grace_period=10, leaf_prediction="mc", split_confidence=0.1) for x, y in dataset: model.learn_one(x, y) assert model.height > 0
def __init__(self): """Create a persistent model file if there isn't one. If one exists, use it.""" self.file_path = 'models/decision_tree.joblib' self.include_hunger = False self.accuracy_metric_float = 0.0 self.metrics = metrics.Accuracy() if path.exists(self.file_path): self.model = load(self.file_path) else: self.model = tree.HoeffdingTreeClassifier(grace_period=20) self.save_model()
def __init__(self, my_id=1, bootstrap_servers='', list_of_partitions=[], request_topic='', inference_topic='', group_id='my_grp'): """ Constructor :type interval: int :param interval: Check interval, in seconds """ self.model = tree.HoeffdingTreeClassifier(max_depth=10) # compose.Pipeline( # preprocessing.MinMaxScaler(), # anomaly.HalfSpaceTrees(seed=42)) self.metric = metrics.ROCAUC() # metrics.Accuracy() # self.my_id = my_id self.t = request_topic self.result_t = inference_topic self.my_grp_id = group_id self.result_t_p = 8 self.bootstrap_servers = bootstrap_servers # self.list_of_partitions = list_of_partitions self.tls = [] x = 0 for i in list_of_partitions: self.tls.insert(x, TopicPartition(self.t, i)) x = x+1 #self.tls=list_of_partitions print(self.tls) conf = {'bootstrap.servers': bootstrap_servers, 'sasl.mechanism': 'PLAIN', 'security.protocol': 'SASL_SSL', 'ssl.ca.location': '/tmp/cacert.pem', 'sasl.username': '******', 'sasl.password': '******', # 'sasl.username': '******', # 'sasl.password': '******', # 'key.serializer': StringSerializer('utf_8'), # 'value.serializer': StringSerializer('utf_8'), 'client.id': 'test-sw-1'} self.producer = Producer(conf) conf = {'bootstrap.servers': bootstrap_servers, 'sasl.mechanism': 'PLAIN', 'security.protocol': 'SASL_SSL', 'sasl.username': '******', 'sasl.password': '******', 'ssl.ca.location': '/tmp/cacert.pem', 'group.id': group_id, 'auto.offset.reset': 'latest'} self.consumer = consumer = Consumer(conf) self.consumer.assign(self.tls)
def experiment_ht(): """Runs experiments for Hoeffding Tree""" ht_l = [] train_time_l = [] test_time_l = [] v_m_l = [] s_m_l = [] ht = tree.HoeffdingTreeClassifier(max_size=1000, grace_period=2) for i in range(X_train.shape[0]): X_t = X_r[i] y_t = y_r[i] idx = range(1024) X_t = dict(zip(idx, X_t)) start_time = time.perf_counter() ht.learn_one(X_t, y_t) end_time = time.perf_counter() train_time_l.append(end_time - start_time) if i > 0 and (i + 1) % 100 == 0: p_t = 0.0 start_time = time.perf_counter() for j in range(X_test.shape[0]): y_pred = ht.predict_one(X_test[j]) if y_pred == y_test[j]: p_t += 1 ht_l.append(p_t / X_test.shape[0]) end_time = time.perf_counter() test_time_l.append(end_time - start_time) # Check memory v_m = psutil.virtual_memory()[2] v_m_l.append(v_m) s_m = psutil.swap_memory()[3] s_m_l.append(s_m) # Reformat the train times new_train_time_l = [] for i in range(1, X_train.shape[0]): train_time_l[i] += train_time_l[i - 1] if i > 0 and (i + 1) % 100 == 0: new_train_time_l.append(train_time_l[i]) train_time_l = new_train_time_l return ht_l, train_time_l, test_time_l, v_m_l, s_m_l
EvolutionaryOldestBaggingClassifier(population_size=POPULATION_SIZE, model=AUTOML_CLASSIFICATION_PIPELINE, param_grid=CLASSIFICATION_PARAM_GRID, sampling_rate=SAMPLING_RATE)), ('EvoAutoML Bagging Best', EvolutionaryBaggingClassifier(population_size=POPULATION_SIZE, model=AUTOML_CLASSIFICATION_PIPELINE, param_grid=CLASSIFICATION_PARAM_GRID, sampling_rate=SAMPLING_RATE)), ('ARF', ensemble.AdaptiveRandomForestClassifier()), ('Leveraging Bagging', ensemble.LeveragingBaggingClassifier(model=ENSEMBLE_CLASSIFIER())), ('Bagging', ensemble.BaggingClassifier(model=ENSEMBLE_CLASSIFIER(), n_models=10)), ('SRPC', ensemble.SRPClassifier(n_models=10)), ('Hoeffding Tree', tree.HoeffdingTreeClassifier()), ('Logistic Regression', linear_model.LogisticRegression()), ('HAT', tree.HoeffdingAdaptiveTreeClassifier()), ('GaussianNB', naive_bayes.GaussianNB()), ('KNN', neighbors.KNNClassifier()), ] if __name__ == '__main__': RESULT_PATH.mkdir(parents=True, exist_ok=True) #output = evaluate_ensemble(CLASSIFICATION_TRACKS[1], ENSEMBLE_EVALUATION_MODELS[2]) pool = Pool(60) # Create a multiprocessing Pool output = pool.starmap( evaluate_ensemble, list(
"""Anomaly detection example for CPU, RAM, disk usage.""" from random import randint import gradio as gr from river import tree LABELS = {True: 'Abnormal', False: 'Normal'} # Use decision tree induction algorithm suitable for streaming data MODEL = tree.HoeffdingTreeClassifier(max_depth=4) def train_model(iterations: int = 50000) -> None: """Train on the assumption that all >50% and at least one >90% is an anomaly.""" for _ in range(iterations): x = {metric: randint(1, 100) for metric in ['cpu', 'ram', 'disk']} y = LABELS[min(x.values()) > 50 and max(x.values()) > 90] MODEL.learn_one(x, y) def predict_usage(cpu, ram, disk, is_abnormal): """Make the prediction and update with feedback.""" x = {'cpu': cpu, 'ram': ram, 'disk': disk} result = MODEL.predict_proba_one(x), MODEL.debug_one(x) MODEL.learn_one(x, LABELS[is_abnormal], sample_weight=100) return result def launch_interface(): """Launch the Gradio interface.""" cpu = gr.inputs.Slider(1, 100, 1, 30) ram = gr.inputs.Slider(1, 100, 1, 20)
return synth.LED(seed=42).take(500) def get_regression_data(): return synth.Friedman(seed=42).take(500) @pytest.mark.parametrize( "dataset, model", [ ( get_classification_data(), tree.HoeffdingTreeClassifier( leaf_prediction="mc", max_size=0.025, grace_period=50, memory_estimate_period=50, splitter=tree.splitter.ExhaustiveSplitter(), ), ), ( get_classification_data(), tree.HoeffdingAdaptiveTreeClassifier( leaf_prediction="mc", max_size=0.025, grace_period=50, memory_estimate_period=50, splitter=tree.splitter.ExhaustiveSplitter(), ), ), (
tracks = [ ('Random RBF', random_rbf_track), ('AGRAWAL', agrawal_track), ('Anomaly Sine', anomaly_sine_track), ('Concept Drift', concept_drift_track), ('Hyperplane', hyperplane_track), ('Mixed', mixed_track), ('SEA', sea_track), ('Sine', sine_track), ('STAGGER', stagger_track) ] estimator1 = compose.Pipeline( preprocessing.StandardScaler(), #feature_extraction.PolynomialExtender(), tree.HoeffdingTreeClassifier() ) estimator2 = compose.Pipeline( pipelinehelper.PipelineHelperTransformer([ ('scaler', preprocessing.StandardScaler()) ]), #feature_extraction.PolynomialExtender(), ('classifier', tree.HoeffdingTreeClassifier()) ) estimator3 = compose.Pipeline( ('scaler', preprocessing.StandardScaler()), pipelinehelper.PipelineHelperClassifier([ ('classifier', tree.HoeffdingTreeClassifier()) ]) )
def experiment(angle, classifiers, n_xor, n_rxor, n_test): """Perform XOR RXOR(XNOR) XOR experiment""" X_xor, y_xor = generate_gaussian_parity(n_xor) X_rxor, y_rxor = generate_gaussian_parity(n_rxor, angle_params=angle) X_xor_2, y_xor_2 = generate_gaussian_parity(n_xor) test_x_xor, test_y_xor = generate_gaussian_parity(n_test) test_x_rxor, test_y_rxor = generate_gaussian_parity(n_test, angle_params=angle) X_stream = np.concatenate((X_xor, X_rxor, X_xor_2), axis=0) y_stream = np.concatenate((y_xor, y_rxor, y_xor_2), axis=0) # Instantiate classifiers if classifiers[0] == 1: ht = tree.HoeffdingTreeClassifier(grace_period=2, split_confidence=1e-01) if classifiers[1] == 1: mf = MondrianForestClassifier(n_estimators=10) if classifiers[2] == 1: sdt = DecisionTreeClassifier() if classifiers[3] == 1: sdf = StreamDecisionForest() if classifiers[4] == 1: synf = LifelongClassificationForest(default_n_estimators=10) errors = np.zeros((10, int(X_stream.shape[0] / 25))) for i in range(int(X_stream.shape[0] / 25)): X = X_stream[i * 25:(i + 1) * 25] y = y_stream[i * 25:(i + 1) * 25] # Hoeffding Tree Classifier if classifiers[0] == 1: ht_partial_fit(ht, X, y) ht_xor_y_hat, ht_rxor_y_hat = ht_predict(ht, test_x_xor, test_x_rxor) errors[0, i] = 1 - np.mean(ht_xor_y_hat == test_y_xor) errors[1, i] = 1 - np.mean(ht_rxor_y_hat == test_y_rxor) # Mondrian Forest Classifier if classifiers[1] == 1: mf.partial_fit(X, y) mf_xor_y_hat = mf.predict(test_x_xor) mf_rxor_y_hat = mf.predict(test_x_rxor) errors[2, i] = 1 - np.mean(mf_xor_y_hat == test_y_xor) errors[3, i] = 1 - np.mean(mf_rxor_y_hat == test_y_rxor) # Stream Decision Tree Classifier if classifiers[2] == 1: sdt.partial_fit(X, y, classes=[0, 1]) sdt_xor_y_hat = sdt.predict(test_x_xor) sdt_rxor_y_hat = sdt.predict(test_x_rxor) errors[4, i] = 1 - np.mean(sdt_xor_y_hat == test_y_xor) errors[5, i] = 1 - np.mean(sdt_rxor_y_hat == test_y_rxor) # Stream Decision Forest Classifier if classifiers[3] == 1: sdf.partial_fit(X, y, classes=[0, 1]) sdf_xor_y_hat = sdf.predict(test_x_xor) sdf_rxor_y_hat = sdf.predict(test_x_rxor) errors[6, i] = 1 - np.mean(sdf_xor_y_hat == test_y_xor) errors[7, i] = 1 - np.mean(sdf_rxor_y_hat == test_y_rxor) # Synergistic Forest Classifier if classifiers[4] == 1: if i == 0: synf.add_task(X, y, n_estimators=10, task_id=0) synf_xor_y_hat = synf.predict(test_x_xor, task_id=0) elif i < (n_xor / 25): synf.update_task(X, y, task_id=0) synf_xor_y_hat = synf.predict(test_x_xor, task_id=0) elif i == (n_xor / 25): synf.add_task(X, y, n_estimators=10, task_id=1) synf_xor_y_hat = synf.predict(test_x_xor, task_id=0) synf_rxor_y_hat = synf.predict(test_x_rxor, task_id=1) elif i < (n_xor + n_rxor) / 25: synf.update_task(X, y, task_id=1) synf_xor_y_hat = synf.predict(test_x_xor, task_id=0) synf_rxor_y_hat = synf.predict(test_x_rxor, task_id=1) elif i < (2 * n_xor + n_rxor) / 25: synf.update_task(X, y, task_id=0) synf_xor_y_hat = synf.predict(test_x_xor, task_id=0) synf_rxor_y_hat = synf.predict(test_x_rxor, task_id=1) if i < (n_xor / 25): errors[8, i] = 1 - np.mean(synf_xor_y_hat == test_y_xor) if i >= (n_xor / 25): errors[8, i] = 1 - np.mean(synf_xor_y_hat == test_y_xor) errors[9, i] = 1 - np.mean(synf_rxor_y_hat == test_y_rxor) return errors
from river import synth from river import evaluate from river import metrics from river import tree from river import compose from river import preprocessing from river import linear_model from tqdm import tqdm scaler = preprocessing.StandardScaler() log_reg = linear_model.LinearRegression() hf_tree = tree.HoeffdingTreeClassifier( grace_period=100, split_confidence=1e-5, ) model = compose.Pipeline() model |= hf_tree for index, raw in tqdm(train_features.iterrows(), total=train_features.shape[0]): model.learn_one(raw, train_target[index]) correct_cnt = 0 for index, raw in tqdm(test_features.iterrows(), total=test_features.shape[0]): test_pred = model.predict_one(raw) if test_pred == test_target[index]: correct_cnt += 1