picked = clean[clean['oz'] == 1] nonfeatures = drop_columns(picked, drop_cols) features = picked.columns ## standardize standardize = StandardScaler() X, features = picked.values, picked.columns.values X = standardize.fit_transform(X) ## build model cluster_labels = pd.DataFrame() for k in range(6, 7): pax = Clusterer(model, n_clusters=k, random_state=24) centers = pax.fit(X) pax.store_features(features) print("{} grouped {} clusters.".format(model, np.shape(centers)[0])) ## update labels and scores for column k filepath = "{}/{}/labels.pkl".format(data, model) with open(filepath, "rb") as f: k = pax.attributes['n_clusters'] model_labels_df = pickle.load(f) model_labels_df["k={}".format(k)] = pax.attributes['labels_'] model_labels_df["k{}silhouette_score".format( k)] = pax.get_silhouette_samples() model_labels_df.to_pickle(filepath) print("Updated labels @ {}".format(filepath)) ### ! filepath = "{}/{}/estimator.pkl".format(data, model)