def compare_cluster_runtime(data, n_clusters, n_components):
    t0 = time()
    features = data[0]
    KMeans(n_clusters=n_clusters).fit_transform(features)
    t1 = time() - t0
    t0 = time()
    mixture.GMM(n_components=n_clusters).fit(features)
    t2 = time() - t0
    reduced_higgs_data, t3 = pca_eval.transform(data, n_components=n_components)
    reduced_higgs_data, t4 = rand_projections.transform(data, n_components=n_components)
    reduced_higgs_data, t5 = ica_eval.transform(data, n_components=n_components)
    reduced_higgs_data, t6 = trunk_svd.transform(data, n_components=n_components)
    t0 = time()
    KMeans(n_clusters=n_clusters).fit_transform(reduced_higgs_data)
    t7 = time() - t0
    t0 = time()
    mixture.GMM(n_components=n_clusters).fit(reduced_higgs_data)
    t8 = time() - t0
    ser = pd.Series([t1, t2, t3, t4, t5, t6, t7, t8], index=['original Kmeans clustering',
                                                     'original GMM clustering',
                                                     'PCA', 'RCA', 'ICA', 'LSA',
                                                     'reduced Kmeans clustering',
                                                     'reduced GMM clustering'])
    ser.name = 'time'
    return ser
def run_higg_dimensionality_reduction(higgs_data, n_components):
    pca_trns, pca_elapsed = pca_eval.transform(higgs_data, n_components=n_components)
    rand_proj_trns, rand_proj_elapsed = rand_projections.transform(higgs_data, n_components=n_components)
    ica_trns, ica_elapsed = ica_eval.transform(higgs_data, n_components=n_components)
    lsa_trns, lsa_elapsed = trunk_svd.transform(higgs_data, n_components=n_components)
    transformation_time = pd.Series([pca_elapsed, rand_proj_elapsed, ica_elapsed, lsa_elapsed],
                                    index=['PCA', 'RCA', 'ICA', 'LSA'],
                                    name='transformation_time')
    return {'PCA': pca_trns, 'RCA': rand_proj_trns, 'ICA': ica_trns, 'LSA': lsa_trns}, transformation_time
Exemplo n.º 3
0
def run_higg_dimensionality_reduction(higgs_data, n_components):
    pca_trns, pca_elapsed = pca_eval.transform(higgs_data,
                                               n_components=n_components)
    rand_proj_trns, rand_proj_elapsed = rand_projections.transform(
        higgs_data, n_components=n_components)
    ica_trns, ica_elapsed = ica_eval.transform(higgs_data,
                                               n_components=n_components)
    lsa_trns, lsa_elapsed = trunk_svd.transform(higgs_data,
                                                n_components=n_components)
    transformation_time = pd.Series(
        [pca_elapsed, rand_proj_elapsed, ica_elapsed, lsa_elapsed],
        index=['PCA', 'RCA', 'ICA', 'LSA'],
        name='transformation_time')
    return {
        'PCA': pca_trns,
        'RCA': rand_proj_trns,
        'ICA': ica_trns,
        'LSA': lsa_trns
    }, transformation_time