def __init__(self, my_id=1, bootstrap_servers='', list_of_partitions=[], request_topic='', inference_topic='', group_id='my_grp'): """ Constructor :type interval: int :param interval: Check interval, in seconds """ self.model = compose.Pipeline( preprocessing.MinMaxScaler(), anomaly.HalfSpaceTrees( seed=42)) # tree.HoeffdingTreeClassifier(max_depth=10) self.metric = metrics.ROCAUC() # metrics.Accuracy() # self.my_id = my_id self.t = request_topic self.result_t = inference_topic self.my_grp_id = group_id self.result_t_p = 8 self.bootstrap_servers = bootstrap_servers # self.list_of_partitions = list_of_partitions self.tls = [] x = 0 for i in list_of_partitions: self.tls.insert(x, TopicPartition(self.t, i)) x = x + 1 #self.tls=list_of_partitions print(self.tls) conf = { 'bootstrap.servers': bootstrap_servers, 'sasl.mechanism': 'PLAIN', 'security.protocol': 'SASL_SSL', 'ssl.ca.location': '/tmp/cacert.pem', 'sasl.username': '******', 'sasl.password': '******', # 'sasl.username': '******', # 'sasl.password': '******', # 'key.serializer': StringSerializer('utf_8'), # 'value.serializer': StringSerializer('utf_8'), 'client.id': 'test-sw-1' } self.producer = Producer(conf) conf = { 'bootstrap.servers': bootstrap_servers, 'sasl.mechanism': 'PLAIN', 'security.protocol': 'SASL_SSL', 'sasl.username': '******', 'sasl.password': '******', 'ssl.ca.location': '/tmp/cacert.pem', 'group.id': group_id, 'auto.offset.reset': 'latest' } self.consumer = consumer = Consumer(conf) self.consumer.assign(self.tls)
"estimator, check", [ pytest.param(estimator, check, id=f"{estimator}:{check.__name__}") for estimator in list(get_all_estimators()) + [ feature_extraction.TFIDF(), linear_model.LogisticRegression(), preprocessing.StandardScaler() | linear_model.LinearRegression(), preprocessing.StandardScaler() | linear_model.PAClassifier(), (preprocessing.StandardScaler() | multiclass.OneVsRestClassifier( linear_model.LogisticRegression())), (preprocessing.StandardScaler() | multiclass.OneVsRestClassifier(linear_model.PAClassifier())), naive_bayes.GaussianNB(), preprocessing.StandardScaler(), cluster.KMeans(n_clusters=5, seed=42), preprocessing.MinMaxScaler(), preprocessing.MinMaxScaler() + preprocessing.StandardScaler(), feature_extraction.PolynomialExtender(), (feature_extraction.PolynomialExtender() | preprocessing.StandardScaler() | linear_model.LinearRegression()), feature_selection.VarianceThreshold(), feature_selection.SelectKBest(similarity=stats.PearsonCorr()), ] for check in utils.estimator_checks.yield_checks(estimator) if check.__name__ not in estimator._unit_test_skips() ], ) def test_check_estimator(estimator, check): check(copy.deepcopy(estimator))
REGRESSION_TRACKS = [ ('Trump Approval', trump_mse_track), # ('Bikes', bikes_mse_track), ('Chick Weights', chickweights_mse_track), # ('Movielens', movielens_mse_track), # ('Restaurants', restaurants_mse_track), # ('Taxi', taxis_mse_track) ] AUTOML_CLASSIFICATION_PIPELINE = compose.Pipeline( ( 'Scaler', PipelineHelperTransformer([ ('StandardScaler', preprocessing.StandardScaler()), ('MinMaxScaler', preprocessing.MinMaxScaler()), ('MinAbsScaler', preprocessing.MaxAbsScaler()), # todo create dummy # ('RobustScaler', preprocessing.RobustScaler()), # ('AdaptiveStandardScaler', preprocessing.AdaptiveStandardScaler()), # ('LDA', preprocessing.LDA()), ])), # ('FeatureExtractor', PipelineHelperTransformer([ # ('PolynomialExtender', feature_extraction.PolynomialExtender()), # ('RBF', feature_extraction.RBFSampler()), # ])), ( 'Classifier', PipelineHelperClassifier([ ('HT', tree.HoeffdingTreeClassifier()), # ('FT', tree.ExtremelyFastDecisionTreeClassifier()),
optim.Adam, { "beta_1": [0.1, 0.01, 0.001], "lr": [0.1, 0.01, 0.001, 0.0001], }, ), ] } }, 2 + 3 * 4, ), ( compose.Pipeline(("Scaler", None), linear_model.LinearRegression()), { "Scaler": [ preprocessing.MinMaxScaler(), preprocessing.MaxAbsScaler(), preprocessing.StandardScaler(), ], "LinearRegression": {"optimizer": {"lr": [1e-1, 1e-2, 1e-3]}}, }, 3 * 3, ), ], ) def test_expand_param_grid_count(model, param_grid, count): assert len(utils.expand_param_grid(model, param_grid)) == count def test_decision_tree_max_depth():
return dataset train = train_tuple[:] test = test_tuple[:] #Passive Aggressive Classifier PA_model = compose.Pipeline( ('features', compose.TransformerUnion( ('pipe1', compose.Pipeline(('select_numeric_features', compose.Select('length', 'punct%', 'similarity')), ('scale', preprocessing.MinMaxScaler()))), ('pipe2', compose.Pipeline( ('select_text_features', compose.Select('content')), ('tfidf', feature_extraction.TFIDF(on='content')))))), ('modeling', linear_model.PAClassifier())) metric = metrics.ROCAUC() train1 = train[:] PA_score1 = [] y_pred_l1 = [] y_l1 = [] for x, y in train1: x = text_processing(x) y_pred = PA_model.predict_one(x) y_pred_l1.append(y_pred)