Logistic_model = compose.Pipeline( ('features', compose.TransformerUnion( ('pipe1', compose.Pipeline(('drop_non_features', compose.Discard('body', 'date', 'subject', 'text', 'title', 'title_clean')), ('scale', preprocessing.StandardScaler()))), ('pipe2', compose.Pipeline( ('drop_non_featuress', compose.Discard('body', 'body_len', 'body_num', 'date', 'punct%', 'subject', 'text', 'title', 'title_len', 'title_num')), ('tfidf', feature_extraction.TFIDF(on='title_clean')))))), ('modeling', linear_model.LogisticRegression())) #metric = metrics.Accuracy() #evaluate.progressive_val_score(dataset_tuple_a, model, metric) #model.predict_proba_one(z) #model.predict_one(z) #print(Logistic_model.draw()) metric = metrics.ROCAUC() train1 = train[:]
submodule = f"river.{submodule}" for _, obj in inspect.getmembers(importlib.import_module(submodule), is_estimator): if issubclass(obj, ignored): continue params = obj._unit_test_params() yield obj(**params) @pytest.mark.parametrize( "estimator, check", [ pytest.param(estimator, check, id=f"{estimator}:{check.__name__}") for estimator in list(get_all_estimators()) + [ feature_extraction.TFIDF(), linear_model.LogisticRegression(), preprocessing.StandardScaler() | linear_model.LinearRegression(), preprocessing.StandardScaler() | linear_model.PAClassifier(), (preprocessing.StandardScaler() | multiclass.OneVsRestClassifier( linear_model.LogisticRegression())), (preprocessing.StandardScaler() | multiclass.OneVsRestClassifier(linear_model.PAClassifier())), naive_bayes.GaussianNB(), preprocessing.StandardScaler(), cluster.KMeans(n_clusters=5, seed=42), preprocessing.MinMaxScaler(), preprocessing.MinMaxScaler() + preprocessing.StandardScaler(), feature_extraction.PolynomialExtender(), (feature_extraction.PolynomialExtender()
train = train_tuple[:] test = test_tuple[:] #Passive Aggressive Classifier PA_model = compose.Pipeline( ('features', compose.TransformerUnion( ('pipe1', compose.Pipeline(('select_numeric_features', compose.Select('length', 'punct%', 'similarity')), ('scale', preprocessing.MinMaxScaler()))), ('pipe2', compose.Pipeline( ('select_text_features', compose.Select('content')), ('tfidf', feature_extraction.TFIDF(on='content')))))), ('modeling', linear_model.PAClassifier())) metric = metrics.ROCAUC() train1 = train[:] PA_score1 = [] y_pred_l1 = [] y_l1 = [] for x, y in train1: x = text_processing(x) y_pred = PA_model.predict_one(x) y_pred_l1.append(y_pred) y_l1.append(y) PA_model.learn_one(x, y) metric.update(y, y_pred) PA_score1.append(float(str(metric).split(':')[1]))