Python KMeansPlusPlus примеры использования

Язык программирования: Python

Пространство имен/Пакет: nimbusml.cluster

Класс/Тип: KMeansPlusPlus

Примеров на hotexamples.com: 9

Python KMeansPlusPlus - 9 примеров найдено. Это лучшие примеры Python кода для nimbusml.cluster.KMeansPlusPlus, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

KMeansPlusPlus(12)

predict(1)

Основные методы

KMeansPlusPlus (12)

predict (1)

Пример #1

Показать файл

 def test_defaults(self):
     schema = DataSchema.read_schema(infert_file, numeric_dtype=np.float32)
     data = FileDataStream.read_csv(infert_file, schema=schema)
     pipeline_steps = [
         OneHotVectorizer(columns={'edu': 'education'}),
         KMeansPlusPlus(
             n_clusters=5,
             feature=['edu', 'age', 'parity', 'spontaneous', 'stratum'])
     ]
     check_cv(pipeline_steps, data)

Пример #2

Показать файл

Файл: test_exports.py Проект: zyw400/NimbusML-1

 def test_get_fit_info_clustering(self):
     X_train = pandas.DataFrame(
         data=dict(x=[0, 1, 2, 10, 11, 12, -10, -11, -12],
                   y=[0, 1, 2, 10, 11, 12, -10, -11, -12],
                   z=[0, 1, 2, 10, 11, 12, -10, -11, -12]))
     y_train = pandas.DataFrame(data=dict(
         clusterid=[0, 0, 0, 1, 1, 1, 2, 2, 2]))
     pipeline = Pipeline([KMeansPlusPlus(n_clusters=3)])
     pipeline.fit(X_train, y_train, verbose=0)
     scores = pipeline.predict(X_train)
     info = pipeline.get_fit_info(X_train, y_train)
     last = info[0][-1]
     out = last['outputs']
     assert out == ['PredictedLabel', 'Score.0', 'Score.1', 'Score.2']
     assert len(scores) == 9

Пример #3

Показать файл

Файл: test_score_method.py Проект: zyw400/NimbusML-1

    def test_score_clusterer(self):
        np.random.seed(0)
        df = get_dataset("iris").as_df()
        df.drop(['Species'], inplace=True, axis=1)
        df.Label = [1 if x == 1 else 0 for x in df.Label]
        X_train, X_test, y_train, y_test = \
            train_test_split(df.loc[:, df.columns != 'Label'], df['Label'])

        lr = KMeansPlusPlus(n_clusters=2,
                            init_algorithm="Random",
                            train_threads=1)
        e = Pipeline([lr])
        e.fit(X_train, y_train.to_frame())
        metrics = e.score(X_test, y_test)
        print(metrics)
        assert_almost_equal(metrics,
                            0.36840763005544264,
                            decimal=5,
                            err_msg="NMI loss should be %s" %
                            0.36840763005544264)

Пример #4

Показать файл

Файл: test_fit_graph.py Проект: xadupre/NimbusML

    def test_non_label_based_predictor_does_not_have_label_column_automatically_removed(
            self):
        train_data = {
            'c1': [2, 3, 4, 5],
            'c2': [3, 4, 5, 6],
            'c3': [4, 5, 6, 7],
            'Label': [0, 1, 2, 1]
        }
        train_df = pd.DataFrame(train_data)

        predictor = KMeansPlusPlus(n_clusters=5)
        pipeline = Pipeline([predictor])
        result = json.loads(pipeline.fit(train_df, dry_run=True))
        nodes = result['nodes']

        self.assertEqual(nodes[0]["Name"], "Transforms.FeatureCombiner")
        if six.PY2:
            self.assertItemsEqual(nodes[0]["Inputs"]["Features"],
                                  ['c1', 'c2', 'c3', 'Label'])
        else:
            self.assertCountEqual(nodes[0]["Inputs"]["Features"],
                                  ['c1', 'c2', 'c3', 'Label'])
        self.assertEqual(nodes[1]["Name"], "Trainers.KMeansPlusPlusClusterer")
        self.assertEqual(nodes[1]["Inputs"]["FeatureColumnName"], "Features")

Пример #5

Показать файл

Файл: KMeansPlusPlus.py Проект: zyw400/NimbusML-1

# data input (as a FileDataStream)
path = get_dataset('infert').as_filepath()

data = FileDataStream.read_csv(path)
print(data.head())
#    age  case education  induced  parity ... row_num  spontaneous  ...
# 0   26     1    0-5yrs        1       6 ...       1            2  ...
# 1   42     1    0-5yrs        1       1 ...       2            0  ...
# 2   39     1    0-5yrs        2       6 ...       3            0  ...
# 3   34     1    0-5yrs        2       4 ...       4            0  ...
# 4   35     1   6-11yrs        1       3 ...       5            1  ...

# define the training pipeline
pipeline = Pipeline([
    OneHotVectorizer(columns={'edu': 'education'}),
    KMeansPlusPlus(n_clusters=5, feature=['induced', 'edu', 'parity'])
])

# train, predict, and evaluate
# TODO: Replace with CV
metrics, predictions = pipeline \
    .fit(data) \
    .test(data, 'induced', output_scores=True)

# print predictions
print(predictions.head())
#   PredictedLabel   Score.0   Score.1   Score.2   Score.3   Score.4
# 0               4  2.732253  2.667988  2.353899  2.339244  0.092014
# 1               4  2.269290  2.120064  2.102576  2.222578  0.300347
# 2               4  3.482253  3.253153  2.425328  2.269245  0.258680
# 3               4  3.130401  2.867317  2.158132  2.055911  0.175347

Пример #6

Показать файл

Файл: test_estimator_checks.py Проект: yazici/NimbusML

    'check_fit_score_takes_y', 'check_fit2d_predict1d', 'check_fit1d_1feature',
    'check_dont_overwrite_parameters', 'check_supervised_y_2d',
    'check_estimators_fit_returns_self', 'check_estimators_overwrite_params',
    'check_estimators_dtypes', 'check_classifiers_classes',
    'check_classifiers_train'
]

INSTANCES = {
    'EnsembleClassifier':
    EnsembleClassifier(num_models=3),
    'EnsembleRegressor':
    EnsembleRegressor(num_models=3),
    'FactorizationMachineBinaryClassifier':
    FactorizationMachineBinaryClassifier(shuffle=False),
    'KMeansPlusPlus':
    KMeansPlusPlus(n_clusters=2),
    'LightGbmBinaryClassifier':
    LightGbmBinaryClassifier(minimum_example_count_per_group=1,
                             minimum_example_count_per_leaf=1),
    'LightGbmClassifier':
    LightGbmClassifier(minimum_example_count_per_group=1,
                       minimum_example_count_per_leaf=1),
    'LightGbmRegressor':
    LightGbmRegressor(minimum_example_count_per_group=1,
                      minimum_example_count_per_leaf=1),
    'LightGbmRanker':
    LightGbmRanker(minimum_example_count_per_group=1,
                   minimum_example_count_per_leaf=1),
    'NGramFeaturizer':
    NGramFeaturizer(word_feature_extractor=n_gram()),
    'SgdBinaryClassifier':

Пример #7

Показать файл

Файл: test_export_to_onnx.py Проект: xadupre/NimbusML

     FromKey(columns=['Sepal_Length'])
 ]),
 # GlobalContrastRowScaler currently requires a vector input to work
 'GlobalContrastRowScaler': Pipeline([
     ColumnConcatenator() << {
         'concated_columns': [
             'Petal_Length',
             'Sepal_Width',
             'Sepal_Length']},
     GlobalContrastRowScaler(columns={'normed_columns': 'concated_columns'})
 ]),
 'Handler': Handler(replace_with='Mean', columns={'NewVals': 'Petal_Length'}),
 'IidSpikeDetector': IidSpikeDetector(columns=['Sepal_Length']),
 'IidChangePointDetector': IidChangePointDetector(columns=['Sepal_Length']),
 'Indicator': Indicator(columns={'Has_Nan': 'Petal_Length'}),
 'KMeansPlusPlus': KMeansPlusPlus(n_clusters=3, feature=['Sepal_Width', 'Sepal_Length']),
 'LightGbmRanker': LightGbmRanker(feature=['Class', 'dep_day', 'duration'],
                                  label='rank',
                                  group_id='group'),
 'Loader': Loader(columns={'ImgPath': 'Path'}),
 'LpScaler': Pipeline([
     ColumnConcatenator() << {
         'concated_columns': [
             'Petal_Length',
             'Sepal_Width',
             'Sepal_Length']},
     LpScaler(columns={'normed_columns': 'concated_columns'})
 ]),
 'MutualInformationSelector': Pipeline([
     ColumnConcatenator(columns={'Features': ['Sepal_Width', 'Sepal_Length', 'Petal_Width']}),
     MutualInformationSelector(

Пример #8

Показать файл

Файл: test_model_summary.py Проект: vdedyukhin/NimbusML

    FastLinearBinaryClassifier(),
    FastLinearClassifier(),
    FastLinearRegressor(),
    LogisticRegressionBinaryClassifier(),
    LogisticRegressionClassifier(),
    OnlineGradientDescentRegressor(),
    SgdBinaryClassifier(),
    # SymSgdBinaryClassifier(),
    OrdinaryLeastSquaresRegressor(),
    PoissonRegressionRegressor()
]

learners_not_supported = [
    NaiveBayesClassifier(),
    # fix in nimbusml, needs to implement ICanGetSummaryAsIDataView
    KMeansPlusPlus(),
    # fix in nimbusml, needs to implement ICanGetSummaryAsIDataView
    # fix in nimbusml, needs to implement ICanGetSummaryAsIDataView
    FactorizationMachineBinaryClassifier(),
    PcaAnomalyDetector(),
    # fix in nimbusml, needs to implement ICanGetSummaryAsIDataView
    # PcaTransformer(), # REVIEW: crashes
    GamBinaryClassifier(),
    # fix in nimbusml, needs to implement ICanGetSummaryAsIDataView
    GamRegressor(
    ),  # fix in nimbusml, needs to implement ICanGetSummaryAsIDataView
    LightGbmClassifier(),
    # fix in nimbusml, needs to implement ICanGetSummaryAsIDataView
    # LightGbmRanker(), # REVIEW: crashes
    # fix in nimbusml, needs to implement ICanGetSummaryAsIDataView
    OneVsRestClassifier(FastLinearBinaryClassifier()),

Пример #9

Показать файл

Файл: KMeansPlusPlus_df.py Проект: zyw400/NimbusML-1

###############################################################################
# KMeansPlusPlus
import pandas
from nimbusml import Pipeline
from nimbusml.cluster import KMeansPlusPlus

# define 3 clusters with centroids (1,1,1), (11,11,11) and (-11,-11,-11)
X_train = pandas.DataFrame(data=dict(x=[0, 1, 2, 10, 11, 12, -10, -11, -12],
                                     y=[0, 1, 2, 10, 11, 12, -10, -11, -12],
                                     z=[0, 1, 2, 10, 11, 12, -10, -11, -12]))

# these should clearly belong to just 1 of the 3 clusters
X_test = pandas.DataFrame(data=dict(x=[-1, 3, 9, 13, -13, -20],
                                    y=[-1, 3, 9, 13, -13, -20],
                                    z=[-1, 3, 9, 13, -13, -20]))

y_test = pandas.DataFrame(data=dict(clusterid=[2, 2, 1, 1, 0, 0]))

pipe = Pipeline([KMeansPlusPlus(n_clusters=3)]).fit(X_train)

metrics, predictions = pipe.test(X_test, y_test, output_scores=True)

# print predictions
print(predictions.head())

# print evaluation metrics
print(metrics)