'\nKmeans', 'nr estimators', 'silhouete', percentage=False) plt.show() #%% n_clusters = 5 algs = ["PCA", "selectkbest"] plt.figure() fig, axs = plt.subplots(2, len(algs), figsize=(14, 8), squeeze=False) for a in range(len(algs)): datar = datapp.feature_reduction(df, "class", ["class", "id"], n_features=2, as_int=True, alg=algs[a]) y: np.ndarray = datar[to_clf].values X: np.ndarray = datar.drop([to_clf, "id"], axis=1).values kmeans_model = cluster.KMeans(n_clusters=n_clusters, random_state=rs).fit(X) labels = kmeans_model.labels_ cluster_centers = kmeans_model.cluster_centers_ labels_unique = np.unique(labels) n_clusters_ = len(labels_unique) #plot #plt.clf()
print(sil[4]) plot.multiple_line_chart(axs[0, 0], n_clusters, ivalues, '\nKmeans', 'nr estimators', 'inertia', percentage=False) plot.multiple_line_chart(axs[0, 1], n_clusters, svalues, '\nKmeans', 'nr estimators', 'silhouete', percentage=False) plt.show() #%% n_clusters=6 algs = ["PCA", "selectkbest"] plt.figure() fig, axs = plt.subplots(2 ,len(algs), figsize=(14, 8), squeeze=False) for a in range(len(algs)): datar = datapp.feature_reduction(df, to_clf,categoric+[to_clf], n_features=2, as_int=True, alg=algs[a]) y: np.ndarray = datar[to_clf].values X: np.ndarray = datar.drop([to_clf], axis=1).values kmeans_model = cluster.KMeans(n_clusters=n_clusters, random_state=rs).fit(X) labels = kmeans_model.labels_ cluster_centers = kmeans_model.cluster_centers_ labels_unique = np.unique(labels) n_clusters_ = len(labels_unique) #plot #plt.clf() colors = cycle('bgrcmykbgrcmykbgrcmykbgrcmyk') for k, col in zip(range(n_clusters_), colors): my_members = labels == k
thresholds = [1 ,0.95, 0.90, 0.8] selects = [1, 0.9, 0.8, 0.75, 0.6] algs = ["selectkbest", "PCA"] plt.figure() fig, axs = plt.subplots(2, 2, figsize=(12, 7), squeeze=False) for k in range(len(algs)): f = algs[k] values = {} svalues = {} for d in selects: yvalues = [] syvalues = [] for tr in thresholds: datared = datapp.preprocess_alt(data, "class", red_corr=True, tr=tr, n=5, normalization=normalization, ignore_classes=categoric, as_df=True) df = datapp.feature_reduction(datared, "class",["class","id"], d, alg=f) rf = RandomForestClassifier(random_state=rs) acc, sens, _ = eval.train_predict_kfold(df, "class", rf, bal=bal) yvalues.append(acc) syvalues.append(sens) values[d] = yvalues svalues[d] = syvalues plot.multiple_line_chart(axs[0, k], thresholds, values, 'Random Forests with %s reduction' % f, 'threshold of reduction', 'accuracy') plot.multiple_line_chart(axs[1, k], thresholds, svalues, 'Random Forests with %s reduction' % f, 'threshold of reduction', 'sensitivity', percentage=False) plt.show() #%% tr=0.95 f= "selectkbest"