def __init__(self,
                 my_id=1,
                 bootstrap_servers='',
                 list_of_partitions=[],
                 request_topic='',
                 inference_topic='',
                 group_id='my_grp'):
        """ Constructor
        :type interval: int
        :param interval: Check interval, in seconds
        """
        self.model = compose.Pipeline(
            preprocessing.MinMaxScaler(), anomaly.HalfSpaceTrees(
                seed=42))  # tree.HoeffdingTreeClassifier(max_depth=10)
        self.metric = metrics.ROCAUC()  # metrics.Accuracy() #
        self.my_id = my_id
        self.t = request_topic
        self.result_t = inference_topic
        self.my_grp_id = group_id
        self.result_t_p = 8
        self.bootstrap_servers = bootstrap_servers
        #         self.list_of_partitions = list_of_partitions

        self.tls = []
        x = 0
        for i in list_of_partitions:
            self.tls.insert(x, TopicPartition(self.t, i))
            x = x + 1
        #self.tls=list_of_partitions
        print(self.tls)

        conf = {
            'bootstrap.servers': bootstrap_servers,
            'sasl.mechanism': 'PLAIN',
            'security.protocol': 'SASL_SSL',
            'ssl.ca.location': '/tmp/cacert.pem',
            'sasl.username': '******',
            'sasl.password':
            '******',
            #                 'sasl.username': '******',
            #                 'sasl.password': '******',
            # 'key.serializer': StringSerializer('utf_8'),
            # 'value.serializer': StringSerializer('utf_8'),
            'client.id': 'test-sw-1'
        }

        self.producer = Producer(conf)
        conf = {
            'bootstrap.servers': bootstrap_servers,
            'sasl.mechanism': 'PLAIN',
            'security.protocol': 'SASL_SSL',
            'sasl.username': '******',
            'sasl.password':
            '******',
            'ssl.ca.location': '/tmp/cacert.pem',
            'group.id': group_id,
            'auto.offset.reset': 'latest'
        }
        self.consumer = consumer = Consumer(conf)
        self.consumer.assign(self.tls)
Пример #2
0
    "estimator, check",
    [
        pytest.param(estimator, check, id=f"{estimator}:{check.__name__}")
        for estimator in list(get_all_estimators()) + [
            feature_extraction.TFIDF(),
            linear_model.LogisticRegression(),
            preprocessing.StandardScaler() | linear_model.LinearRegression(),
            preprocessing.StandardScaler() | linear_model.PAClassifier(),
            (preprocessing.StandardScaler()
             | multiclass.OneVsRestClassifier(
                 linear_model.LogisticRegression())),
            (preprocessing.StandardScaler()
             | multiclass.OneVsRestClassifier(linear_model.PAClassifier())),
            naive_bayes.GaussianNB(),
            preprocessing.StandardScaler(),
            cluster.KMeans(n_clusters=5, seed=42),
            preprocessing.MinMaxScaler(),
            preprocessing.MinMaxScaler() + preprocessing.StandardScaler(),
            feature_extraction.PolynomialExtender(),
            (feature_extraction.PolynomialExtender()
             | preprocessing.StandardScaler()
             | linear_model.LinearRegression()),
            feature_selection.VarianceThreshold(),
            feature_selection.SelectKBest(similarity=stats.PearsonCorr()),
        ] for check in utils.estimator_checks.yield_checks(estimator)
        if check.__name__ not in estimator._unit_test_skips()
    ],
)
def test_check_estimator(estimator, check):
    check(copy.deepcopy(estimator))
Пример #3
0
REGRESSION_TRACKS = [
    ('Trump Approval', trump_mse_track),
    # ('Bikes', bikes_mse_track),
    ('Chick Weights', chickweights_mse_track),
    # ('Movielens', movielens_mse_track),
    # ('Restaurants', restaurants_mse_track),
    # ('Taxi', taxis_mse_track)
]

AUTOML_CLASSIFICATION_PIPELINE = compose.Pipeline(
    (
        'Scaler',
        PipelineHelperTransformer([
            ('StandardScaler', preprocessing.StandardScaler()),
            ('MinMaxScaler', preprocessing.MinMaxScaler()),
            ('MinAbsScaler', preprocessing.MaxAbsScaler()),
            # todo create dummy
            # ('RobustScaler', preprocessing.RobustScaler()),
            # ('AdaptiveStandardScaler', preprocessing.AdaptiveStandardScaler()),
            # ('LDA', preprocessing.LDA()),
        ])),
    # ('FeatureExtractor', PipelineHelperTransformer([
    #    ('PolynomialExtender', feature_extraction.PolynomialExtender()),
    # ('RBF', feature_extraction.RBFSampler()),
    # ])),
    (
        'Classifier',
        PipelineHelperClassifier([
            ('HT', tree.HoeffdingTreeClassifier()),
            # ('FT', tree.ExtremelyFastDecisionTreeClassifier()),
Пример #4
0
                            optim.Adam,
                            {
                                "beta_1": [0.1, 0.01, 0.001],
                                "lr": [0.1, 0.01, 0.001, 0.0001],
                            },
                        ),
                    ]
                }
            },
            2 + 3 * 4,
        ),
        (
            compose.Pipeline(("Scaler", None), linear_model.LinearRegression()),
            {
                "Scaler": [
                    preprocessing.MinMaxScaler(),
                    preprocessing.MaxAbsScaler(),
                    preprocessing.StandardScaler(),
                ],
                "LinearRegression": {"optimizer": {"lr": [1e-1, 1e-2, 1e-3]}},
            },
            3 * 3,
        ),
    ],
)
def test_expand_param_grid_count(model, param_grid, count):
    assert len(utils.expand_param_grid(model, param_grid)) == count


def test_decision_tree_max_depth():
Пример #5
0
    return dataset


train = train_tuple[:]

test = test_tuple[:]

#Passive Aggressive Classifier
PA_model = compose.Pipeline(
    ('features',
     compose.TransformerUnion(
         ('pipe1',
          compose.Pipeline(('select_numeric_features',
                            compose.Select('length', 'punct%', 'similarity')),
                           ('scale', preprocessing.MinMaxScaler()))),
         ('pipe2',
          compose.Pipeline(
              ('select_text_features', compose.Select('content')),
              ('tfidf', feature_extraction.TFIDF(on='content')))))),
    ('modeling', linear_model.PAClassifier()))

metric = metrics.ROCAUC()
train1 = train[:]
PA_score1 = []
y_pred_l1 = []
y_l1 = []
for x, y in train1:
    x = text_processing(x)
    y_pred = PA_model.predict_one(x)
    y_pred_l1.append(y_pred)