Пример #1
0
def main(args):

    input_folder = args.i_path
    threshold = args.threshold
    mode = args.mode
    intervals = args.resolution
    print('LOADING PKL...')
    artists = load_data(filename=input_folder)

    print('PREPROCESSING ', d[mode])

    X, y = gen_dataset(artists=artists, mode=mode)
    X, y = remove_outlier(X=X, y=y, thresh=threshold)
    X = normalize(X=X)

    for lr in [10, 100, 500, 1000]:
        print('TSNE with learning rate =', lr)
        X_emb = tsne(X, lr=lr)

        print('[TSNE-1 - TSNE-2]')
        print('min values')
        print(np.amin(X_emb, axis=0))
        print('max values')
        print(np.amax(X_emb, axis=0))
        print('mean values')
        print(np.mean(X_emb, axis=0))
        print('variance values')
        print(np.var(X_emb, axis=0))

    #artists = optimize_artists_dictionary(artists)
    #artists = attach_tsne_to_art_dict(artists=artists, X=X, y=y)

    tsne_min = np.amin(X, axis=0)
    tsne_max = np.amax(X, axis=0)
Пример #2
0
def main(args):
    input_folder = args.i_path
    threshold = args.threshold
    mode = args.mode

    print('LOADING PKL...')
    artists = load_data(filename=input_folder)

    print('PREPROCESSING ', d[mode])

    X, y = gen_dataset(artists=artists, mode=mode)
    X, y = remove_outlier(X=X, y=y, thresh=threshold)
    X = normalize(X=X)
    print('TSNE')

    X = tsne(X=X, lr=1000)

    artists = optimize_artists_dictionary(artists)
    artists = attach_tsne_to_art_dict(artists=artists, X=X, y=y)

    tsne_min = np.amin(X, axis=0)
    tsne_max = np.amax(X, axis=0)

    print('[TSNE-1 - TSNE-2]')
    print('min values')
    print(np.amin(X, axis=0))
    print('max values')
    print(np.amax(X, axis=0))
    print('mean values')
    print(np.mean(X, axis=0))
    print('variance values')
    print(np.var(X, axis=0))

    artists = clean_similar_artists(artists=artists)
Пример #3
0
def main(args):
    input_folder = args.i_path
    global output_path
    output_path = args.o_path
    if output_path[-1] != '/':
        output_path += '/'
    global artists
    print('LOADING PKL...')
    artists = load_data(filename=input_folder)

    print('PREPROCESSING')

    X, y = gen_dataset(artists=artists, mode=3)

    for t in np.arange(1, 3, 0.2):
        A, b = remove_outlier(X=X, y=y, thresh=t)
Пример #4
0
def main(args):
    input_folder = args.i_path
    threshold = args.threshold
    output_pkl = args.output_pkl
    global output_path
    output_path = args.o_path
    if output_path[-1] != '/':
        output_path += '/'
    mode = args.mode
    global artists
    print('LOADING PKL...')
    artists = load_data(filename=input_folder)


    print('PREPROCESSING ', d[mode])
    X, y = gen_dataset(artists=artists, mode=mode)
    X, y = remove_outlier(X=X, y=y, thresh=threshold)
    X = normalize(X=X)
    print('TSNE')
    X = tsne(X=X, lr=1000)
    artists = optimize_artists_dictionary(artists)
    artists = attach_tsne_to_art_dict(artists=artists, X=X, y=y)
    min = np.amin(X, axis=0)
    max = np.amax(X, axis=0)
    dimension = 20
    print('[TSNE-1 - TSNE-2]')
    print('min values')
    print(np.amin(X, axis=0))
    print('max values')
    print(np.amax(X, axis=0))
    print('mean values')
    print(np.mean(X, axis=0))
    print('variance values')
    print(np.var(X, axis=0))
    artists = clean_similar_artists(artists=artists)
    print('GENERATE HEATMAPS')
    gen_heatmaps_master(dimension=dimension, min=min, max=max)
    print('SAVING DATA')
    save_data(artists, filename=output_pkl)


    print('PLOT HEATMAPS in ', output_path)
    plot_heatmaps_master(dimension=dimension, min=min, max=max)
Пример #5
0
def main(args):
    input_folder = args.i_path
    threshold = args.threshold
    artists = load_data(filename=input_folder)

    X, y = gen_dataset(artists=artists)

    if PRINT_DISTRIBUTION:
        #PRINT VALUES BEFORE OUTLIER REMOTION
        feat_names = get_features_dict()
        x = np.array(X)
        for i in range(x.shape[1]):
            ax = plt.hist(x[:, i], bins=200)
            filename = args.o_path + '/BEFORE/' + feat_names[i] + '.png'
            title = feat_names[i] + 'BEFORE outlier remotion'
            plt.title(title)
            plt.savefig(filename)
            plt.close('all')

    X, y = remove_outlier(X=X,
                          y=y,
                          thresh=threshold,
                          verbose=False,
                          save_histogram=True)
    #X = normalize(X=X)
    #X, y = remove_outliers_lof(data=X, y=y)

    if PRINT_DISTRIBUTION:
        # PRINT VALUES AFTER OUTLIER REMOTION
        x = np.array(X)
        for i in range(x.shape[1]):
            ax = plt.hist(x[:, i], bins=200)
            filename = args.o_path + '/AFTER/' + feat_names[i] + '.png'
            title = feat_names[i] + 'AFTER outlier remotion'
            plt.title(title)
            plt.savefig(filename)
            plt.close('all')