Python Explorer примеры использования

Язык программирования: Python

Пространство имен/Пакет: suricate.explore

Класс/Тип: Explorer

Примеров на hotexamples.com: 6

Python Explorer - 6 примеров найдено. Это лучшие примеры Python кода для suricate.explore.Explorer, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

Explorer(6)

ask_hard(4)

ask_simple(4)

fit(3)

fit_cluster(2)

predict(2)

pred_cluster(1)

Пример #1

Показать файл

def test_pruningpipe():
    print('start', pd.datetime.now())
    n_rows = 500
    n_cluster = 25
    n_simplequestions = 50
    n_pointedquestions = 50
    Xst = getXst(nrows=n_rows)
    ixc = createmultiindex(X=Xst)
    y_true = getytrue()
    y_true = y_true.loc[ixc]
    print(pd.datetime.now(), 'data loaded')
    pipe = PruningPipe(
        connector=DfConnector(
            scorer=Pipeline(steps=[
                ('scores', FeatureUnion(_lr_score_list)),
                ('imputer', SimpleImputer(strategy='constant', fill_value=0))]
            )
        ),
        pruningclf=Explorer(clustermixin=KBinsCluster(n_clusters=n_cluster)),
        sbsmodel=FeatureUnion(transformer_list=_sbs_score_list),
        classifier=LogisticRegressionCV()
    )
    pipe.fit(X=Xst, y=y_true)
    y_pred = pipe.predict(X=Xst)
    precision = precision_score(y_true=y_true, y_pred=y_pred)
    recall = recall_score(y_true=y_true, y_pred=y_pred)
    accuracy = balanced_accuracy_score(y_true=y_true, y_pred=y_pred)
    print('***\nscores:\n')
    print('precision score:{}\n recall score:{}\n balanced accuracy score:{}'.format(
        precision, recall, accuracy))

Пример #2

Показать файл

def test_esconnector():
    print('start', pd.datetime.now())
    n_rows = 500
    n_cluster = 25
    Xst = getXst(nrows=n_rows)
    left = Xst[0]
    esclient = elasticsearch.Elasticsearch()
    scoreplan = {
        'name': {
            'type': 'FreeText'
        },
        'street': {
            'type': 'FreeText'
        },
        'city': {
            'type': 'FreeText'
        },
        'duns': {
            'type': 'Exact'
        },
        'postalcode': {
            'type': 'FreeText'
        },
        'countrycode': {
            'type': 'Exact'
        }
    }
    escon = EsConnector(
        client=esclient,
        scoreplan=scoreplan,
        index="right",
        explain=False,
        size=20
    )
    ixc = createmultiindex(X=Xst)
    y_true = getytrue()
    y_true = y_true.loc[ixc]
    print(pd.datetime.now(), 'data loaded')
    pipe = PruningPipe(
        connector=escon,
        pruningclf=Explorer(clustermixin=KBinsCluster(n_clusters=n_cluster)),
        sbsmodel=FeatureUnion(transformer_list=_sbs_score_list),
        classifier=LogisticRegressionCV()
    )
    pipe.fit(X=left, y=y_true)
    y_pred = pipe.predict(X=left)
    scores = get_commonscores(y_pred=y_pred, y_true=y_true)
    precision = scores['precision']
    recall = scores['recall']
    accuracy = scores['balanced_accuracy']
    print('***\nscores:\n')
    print('precision score:{}\n recall score:{}\n balanced accuracy score:{}'.format(
        precision, recall, accuracy))

Пример #3

Показать файл

def test_explorer():
    print(pd.datetime.now())
    n_rows = 200
    n_cluster = 10
    n_simplequestions = 200
    n_hardquestions = 200
    Xst = getXst(nrows=n_rows)
    y_true = getytrue(Xst=Xst)
    print(pd.datetime.now(), 'data loaded')
    connector = DfConnector(scorer=Pipeline(
        steps=[('scores', FeatureUnion(_score_list)
                ), ('imputer',
                    SimpleImputer(strategy='constant', fill_value=0))]))
    explorer = Explorer(clustermixin=KBinsCluster(n_clusters=n_cluster),
                        n_simple=n_simplequestions,
                        n_hard=n_hardquestions)
    connector.fit(X=Xst)
    # Xsm is the transformed output from the connector, i.e. the score matrix
    Xsm = connector.transform(X=Xst)
    print(pd.datetime.now(), 'score ok')
    # ixc is the index corresponding to the score matrix
    ixc = Xsm.index
    ix_simple = explorer.ask_simple(X=pd.DataFrame(data=Xsm, index=ixc),
                                    fit_cluster=True)
    print(pd.datetime.now(),
          'length of ix_simple {}'.format(ix_simple.shape[0]))
    sbs_simple = connector.getsbs(X=Xst, on_ix=ix_simple)
    print('***** SBS SIMPLE ******')
    print(sbs_simple.sample(5))
    print('*****')
    y_simple = y_true.loc[ix_simple]
    ix_hard = explorer.ask_hard(X=pd.DataFrame(data=Xsm, index=ixc),
                                y=y_simple)
    print(pd.datetime.now(), 'length of ix_hard {}'.format(ix_hard.shape[0]))
    sbs_hard = connector.getsbs(X=Xst, on_ix=ix_hard)
    print(sbs_hard.sample(5))
    print('*****')
    y_train = y_true.loc[ix_simple.union(ix_hard)]
    print('length of y_train: {}'.format(y_train.shape[0]))
    explorer.fit(X=pd.DataFrame(data=Xsm, index=ixc),
                 y=y_train,
                 fit_cluster=True)
    print('results of pred:\n',
          pd.Series(explorer.predict(X=Xsm)).value_counts())
    print('****')

Пример #4

Показать файл

def test_pruning():
    print('start', pd.datetime.now())
    n_rows = 200
    n_cluster = 10
    n_simplequestions = 200
    n_hardquestions = 200
    Xst = getXst(nrows=n_rows)
    y_true = getytrue(Xst=Xst)
    print(pd.datetime.now(), 'data loaded')
    connector = DfConnector(scorer=Pipeline(
        steps=[('scores', FeatureUnion(_score_list)
                ), ('imputer',
                    SimpleImputer(strategy='constant', fill_value=0))]))
    explorer = Explorer(clustermixin=KBinsCluster(n_clusters=n_cluster),
                        n_simple=n_simplequestions,
                        n_hard=n_hardquestions)
    connector.fit(X=Xst)
    # Xst is the transformed output from the connector, i.e. the score matrix
    Xsm = connector.transform(X=Xst)
    print(pd.datetime.now(), 'score ok')
    # ixc is the index corresponding to the score matrix
    ixc = Xsm.index
    y_true = y_true.loc[ixc]

    ix_simple = explorer.ask_simple(X=pd.DataFrame(data=Xsm, index=ixc),
                                    fit_cluster=True)
    ix_hard = explorer.ask_hard(X=pd.DataFrame(data=Xsm, index=ixc),
                                y=y_true.loc[ix_simple])
    ix_train = ix_simple.union(ix_hard)
    print('number of training samples:{}'.format(ix_train.shape[0]))
    X_train = pd.DataFrame(data=Xsm, index=ixc).loc[ix_train]
    y_train = y_true.loc[ix_train]

    explorer.fit(X=X_train, y=y_train, fit_cluster=True)
    y_pruning = explorer.predict(X=Xsm)
    y_pruning = pd.Series(data=y_pruning, name='y_pruning', index=ixc)
    y_pred = (y_pruning > 0).astype(int)
    precision = precision_score(y_true=y_true, y_pred=y_pred)
    recall = recall_score(y_true=y_true, y_pred=y_pred)
    accuracy = balanced_accuracy_score(y_true=y_true, y_pred=y_pred)
    print('***\npruning scores:\n')
    print('precision score:{}\n recall score:{}\n balanced accuracy score:{}'.
          format(precision, recall, accuracy))

Пример #5

Показать файл

Файл: dedupe_pruning_pipeline.py Проект: ogierpaul/suricate

Xst = getXst(nrows=n_rows)
ixc = createmultiindex(X=Xst)
# Load the vector corresponding to Xst
y_true = getytrue().loc[ixc]
print(y_true.value_counts())
print(pd.datetime.now(), 'data loaded')

## Explore the data:
connector = DfConnector(
        scorer=Pipeline(steps=[
            ('scores', FeatureUnion(_lr_score_list)),
            ('imputer', SimpleImputer(strategy='constant', fill_value=0))]
        )
    )
### Fit the cluster non-supervizes
explorer = Explorer(clustermixin=KBinsCluster(n_clusters=n_cluster), n_simple=n_simplequestions, n_hard=n_pointedquestions)
Xst = connector.fit_transform(X=Xst)
explorer.fit_cluster(X=Xst)

### Ask simple questions
ix_simple = explorer.ask_simple(X=Xst)
Sbs_simple = connector.getsbs(X=Xst, on_ix=ix_simple)
y_simple = y_true.loc[ix_simple]

### Fit the cluser with supervized data
explorer.fit(X=Xst, y=y_simple, fit_cluster=False)

### Ask hard (pointed) questions
ix_hard = explorer.ask_hard(X=Xst, y=y_simple)
Sbs_hard = connector.getsbs(X=Xst, on_ix=ix_hard)
y_hard = y_true.loc[ix_hard]

Пример #6

Показать файл

# REBUILD Y_true
y_true = pd.read_sql(sql="SELECT * FROM y_true WHERE y_true.y_true = 1",
                     con=engine).set_index(['ix_source', 'ix_target'],
                                           drop=True)
y_truetemp = Xst[['ix']]
y_truetemp['y_true'] = 0
y_truetemp.loc[y_true.index.intersection(Xst.index),
               'y_true'] = y_true.loc[y_true.index.intersection(Xst.index),
                                      'y_true']
y_true = y_truetemp.copy()
del y_truetemp
### y_true has now a multiindex, ix, and y_true columns

## Fit the cluster to non-supervized data
exp = Explorer(n_simple=n_questions, n_hard=n_questions)
exp.fit_cluster(X=Xst[['es_score']])
y_cluster = pd.Series(data=exp.pred_cluster(X=Xst),
                      index=Xst.index,
                      name='y_cluster')
X_cluster = pd.DataFrame(y_cluster)
X_cluster['avg_score'] = Xst[['es_score']].mean(axis=1)
X_cluster['y_true'] = y_true['y_true']
X_cluster['ix'] = Xst['ix']
X_cluster.reset_index(inplace=True, drop=False)
X_cluster.set_index('ix', inplace=True)
X_cluster = X_cluster[[
    'ix_source', 'ix_target', 'avg_score', 'y_cluster', 'y_true'
]]
X_cluster.to_sql('cluster_output', con=engine, if_exists='replace')