示例#1
0
def vanDongenSpectral(args):

    neighbors, min_d, components, metric, dataset, scaler, k = args

    print(dataset + ', ' + metric + ', ' + scaler + ', n_components=' +
          str(components) + ', n_neighbors=' + str(neighbors) + ', min_dist=' +
          str(min_d) + ', k=' + str(k))

    # Se estandariza usando el scaler correspondiente
    df = scalers[scaler].fit_transform(datasets[dataset])

    # Se aplica UMAP
    um = UMAP(n_components=components,
              n_neighbors=neighbors,
              min_dist=min_d,
              metric=metric)
    embedding = um.fit_transform(df)

    # Se aplica KMeans al embedding
    km = KMeans(n_clusters=k, random_state=0).fit(embedding)

    # Se calcula la matriz de confusion
    tmp = pd.DataFrame({'Generos': metadata.genre, 'data': km.labels_})
    ct = pd.crosstab(tmp['Generos'], tmp['data'])

    return vanDongen(ct)
示例#2
0
                            # Se calculan las validaciones internas
                            sil = get_silhouette_avg(embedding, k)
                            sse = get_sse(embedding, k)

                            # Se aplica KMeans
                            km = KMeans(n_clusters=k,
                                        random_state=0).fit(embedding)

                            # Se calcula la matriz de confusión
                            tmp = pd.DataFrame({
                                'Generos': metadata.genre,
                                'data': km.labels_
                            })
                            ct = pd.crosstab(tmp['Generos'], tmp['data'])

                            # Se calculan las validaciones externas
                            vd = vanDongen(ct)
                            rand = adjusted_rand_score(metadata.genre,
                                                       km.labels_)

                            # Se guardan los resultados de la corrida
                            results.append([
                                dataset, metric, scaler, components, min_d,
                                neighbors, k, sil, sse, vd, rand
                            ])

df_results = pd.DataFrame(results, columns=columns)
df_results

df_results.to_csv('spectral_' + metrics[0] + '.csv', index=False)