def main(args):
    artists_filename = args.i_path
    chunk_filename = args.i_chunk
    global output_path
    output_path = args.output_path
    if output_path[-1] != '/':
        output_path += '/'

    global artists
    print('LOADING PKL...', end='')
    artists = load_data(filename=artists_filename)
    print('DONE')

    global chunk
    print('LOADING CHUNKS...')
    chunk = load_data(filename=chunk_filename)
    print('DONE')



    print('COMPUTE RANKING of selection ', chunk_filename)
    chunk_level_ranking = compute_ranking_master()
    output_filename= os.path.basename(chunk_filename)
    output_filename += '_OUT.pkl'
    output_path += output_filename

    save_data(chunk_level_ranking, filename=output_path)
Exemplo n.º 2
0
def main(args):
    n_chunks = args.n_chunks
    chunk_folder = args.chunk_folder
    if chunk_folder[-1] != '/':
        chunk_folder += '/'

    #group all chunk level ranking in a single ranking file
    dictionary = dict()
    for i in range(n_chunks):
        chunk_filename = 'chunk_' + str(i) + '_OUT.pkl'
        chunk_pathname = chunk_folder + chunk_filename
        chunk_out = load_data(filename=chunk_pathname)

        for k, v in chunk_out.items():
            dictionary[k] = v
        del chunk_out

        print('chunk ', str(i), 'Memory (GB) : ',
              getCurrentMemoryUsage() / (2**20))
    final_pathname = chunk_folder + 'merged_OUT.pkl'
    print('before gc Memory (GB) : ', getCurrentMemoryUsage() / (2**20))
    gc.collect()
    print('after gc Memory (GB) : ', getCurrentMemoryUsage() / (2**20))
    df = pd.DataFrame.from_dict(dictionary)
    save_data(dict=df, filename=final_pathname)
    print('chunk ', str(i), 'Memory (GB) : ',
          getCurrentMemoryUsage() / (2**20))
Exemplo n.º 3
0
def main(args):
    distances_filename = args.distances
    note = args.note
    distances = load_data(filename=distances_filename)

    max_length_ranking = build_max_length_ranking(distances=distances)

    output_path = os.path.dirname(distances_filename)
    basename = 'max_length_ranking_'+note+'.pkl'
    final_pathname = os.path.join(output_path,basename)
    save_data(filename=final_pathname, dict=max_length_ranking)
Exemplo n.º 4
0
def main(args):
    input_folder = args.i_path

    if args.o_path[-1] == '/':
        output_filename = args.o_path + args.o_name
    else:
        output_filename = args.o_path + '/' + args.o_name

    artists = retrieve_artist_dict(basedir=input_folder)

    save_data(dict=artists, filename=output_filename)

    return
Exemplo n.º 5
0
def main(args):
    input_path = args.input_pkl
    output_path = args.output_path
    global metric
    metric = args.metric
    global artists
    artists = load_data(input_path)

    chunk_filename = args.input_chunk
    print('LOADING CHUNKS...')
    chunk = load_data(filename=chunk_filename)
    print('DONE')

    d = build_matrix_master(chunk=chunk)

    save_data(filename=output_path, dict=d)
Exemplo n.º 6
0
def main(args):
    input_folder = args.i_path
    threshold = args.threshold
    output_pkl = args.output_pkl
    global output_path
    output_path = args.o_path
    if output_path[-1] != '/':
        output_path += '/'
    mode = args.mode
    global artists
    print('LOADING PKL...')
    artists = load_data(filename=input_folder)


    print('PREPROCESSING ', d[mode])
    X, y = gen_dataset(artists=artists, mode=mode)
    X, y = remove_outlier(X=X, y=y, thresh=threshold)
    X = normalize(X=X)
    print('TSNE')
    X = tsne(X=X, lr=1000)
    artists = optimize_artists_dictionary(artists)
    artists = attach_tsne_to_art_dict(artists=artists, X=X, y=y)
    min = np.amin(X, axis=0)
    max = np.amax(X, axis=0)
    dimension = 20
    print('[TSNE-1 - TSNE-2]')
    print('min values')
    print(np.amin(X, axis=0))
    print('max values')
    print(np.amax(X, axis=0))
    print('mean values')
    print(np.mean(X, axis=0))
    print('variance values')
    print(np.var(X, axis=0))
    artists = clean_similar_artists(artists=artists)
    print('GENERATE HEATMAPS')
    gen_heatmaps_master(dimension=dimension, min=min, max=max)
    print('SAVING DATA')
    save_data(artists, filename=output_pkl)


    print('PLOT HEATMAPS in ', output_path)
    plot_heatmaps_master(dimension=dimension, min=min, max=max)
Exemplo n.º 7
0
def main(args):
    n_chunks = args.n_chunks
    chunk_folder = args.chunk_folder
    if chunk_folder[-1] != '/':
        chunk_folder += '/'

    #group all chunk level ranking in a single ranking file
    ranking = dict()
    for i in range(n_chunks):
        chunk_filename = 'chunk_' + str(i) + '.pkl_OUT.pkl'
        chunk_pathname = chunk_folder+chunk_filename
        chunk_out = load_data(filename=chunk_pathname)

        for k,v in chunk_out.items():
            ranking[k]=v

    final_pathname= chunk_folder+'merged_OUT.pkl'

    save_data(ranking,filename=final_pathname)
Exemplo n.º 8
0
def main(args):
    input_path = args.input_pkl
    output_path = args.output_path
    if output_path[-1] != '/':
        output_path += '/'

    output_names = output_path + 'names.pkl'
    output_heatmaps = output_path + 'heatmaps.pkl'
    output_gt = output_path + 'ground_truth.pkl'

    artists = load_data(filename=input_path)

    names = dict()
    heatmaps = dict()
    ground_truth = dict()

    for id_, artist in artists.items():
        names[id_] = artist.id
        heatmaps[id_] = artist.tsne_heatmap
        ground_truth[id_] = artist.similar_artists

    save_data(filename=output_heatmaps, dict=heatmaps)
    save_data(filename=output_names, dict=names)
    save_data(filename=output_gt, dict=ground_truth)