def vanDongenSpectral(args): neighbors, min_d, components, metric, dataset, scaler, k = args print(dataset + ', ' + metric + ', ' + scaler + ', n_components=' + str(components) + ', n_neighbors=' + str(neighbors) + ', min_dist=' + str(min_d) + ', k=' + str(k)) # Se estandariza usando el scaler correspondiente df = scalers[scaler].fit_transform(datasets[dataset]) # Se aplica UMAP um = UMAP(n_components=components, n_neighbors=neighbors, min_dist=min_d, metric=metric) embedding = um.fit_transform(df) # Se aplica KMeans al embedding km = KMeans(n_clusters=k, random_state=0).fit(embedding) # Se calcula la matriz de confusion tmp = pd.DataFrame({'Generos': metadata.genre, 'data': km.labels_}) ct = pd.crosstab(tmp['Generos'], tmp['data']) return vanDongen(ct)
# Se calculan las validaciones internas sil = get_silhouette_avg(embedding, k) sse = get_sse(embedding, k) # Se aplica KMeans km = KMeans(n_clusters=k, random_state=0).fit(embedding) # Se calcula la matriz de confusión tmp = pd.DataFrame({ 'Generos': metadata.genre, 'data': km.labels_ }) ct = pd.crosstab(tmp['Generos'], tmp['data']) # Se calculan las validaciones externas vd = vanDongen(ct) rand = adjusted_rand_score(metadata.genre, km.labels_) # Se guardan los resultados de la corrida results.append([ dataset, metric, scaler, components, min_d, neighbors, k, sil, sse, vd, rand ]) df_results = pd.DataFrame(results, columns=columns) df_results df_results.to_csv('spectral_' + metrics[0] + '.csv', index=False)