def main(args): artists_filename = args.i_path chunk_filename = args.i_chunk global output_path output_path = args.output_path if output_path[-1] != '/': output_path += '/' global artists print('LOADING PKL...', end='') artists = load_data(filename=artists_filename) print('DONE') global chunk print('LOADING CHUNKS...') chunk = load_data(filename=chunk_filename) print('DONE') print('COMPUTE RANKING of selection ', chunk_filename) chunk_level_ranking = compute_ranking_master() output_filename= os.path.basename(chunk_filename) output_filename += '_OUT.pkl' output_path += output_filename save_data(chunk_level_ranking, filename=output_path)
def main(args): n_chunks = args.n_chunks chunk_folder = args.chunk_folder if chunk_folder[-1] != '/': chunk_folder += '/' #group all chunk level ranking in a single ranking file dictionary = dict() for i in range(n_chunks): chunk_filename = 'chunk_' + str(i) + '_OUT.pkl' chunk_pathname = chunk_folder + chunk_filename chunk_out = load_data(filename=chunk_pathname) for k, v in chunk_out.items(): dictionary[k] = v del chunk_out print('chunk ', str(i), 'Memory (GB) : ', getCurrentMemoryUsage() / (2**20)) final_pathname = chunk_folder + 'merged_OUT.pkl' print('before gc Memory (GB) : ', getCurrentMemoryUsage() / (2**20)) gc.collect() print('after gc Memory (GB) : ', getCurrentMemoryUsage() / (2**20)) df = pd.DataFrame.from_dict(dictionary) save_data(dict=df, filename=final_pathname) print('chunk ', str(i), 'Memory (GB) : ', getCurrentMemoryUsage() / (2**20))
def main(args): distances_filename = args.distances note = args.note distances = load_data(filename=distances_filename) max_length_ranking = build_max_length_ranking(distances=distances) output_path = os.path.dirname(distances_filename) basename = 'max_length_ranking_'+note+'.pkl' final_pathname = os.path.join(output_path,basename) save_data(filename=final_pathname, dict=max_length_ranking)
def main(args): input_folder = args.i_path if args.o_path[-1] == '/': output_filename = args.o_path + args.o_name else: output_filename = args.o_path + '/' + args.o_name artists = retrieve_artist_dict(basedir=input_folder) save_data(dict=artists, filename=output_filename) return
def main(args): input_path = args.input_pkl output_path = args.output_path global metric metric = args.metric global artists artists = load_data(input_path) chunk_filename = args.input_chunk print('LOADING CHUNKS...') chunk = load_data(filename=chunk_filename) print('DONE') d = build_matrix_master(chunk=chunk) save_data(filename=output_path, dict=d)
def main(args): input_folder = args.i_path threshold = args.threshold output_pkl = args.output_pkl global output_path output_path = args.o_path if output_path[-1] != '/': output_path += '/' mode = args.mode global artists print('LOADING PKL...') artists = load_data(filename=input_folder) print('PREPROCESSING ', d[mode]) X, y = gen_dataset(artists=artists, mode=mode) X, y = remove_outlier(X=X, y=y, thresh=threshold) X = normalize(X=X) print('TSNE') X = tsne(X=X, lr=1000) artists = optimize_artists_dictionary(artists) artists = attach_tsne_to_art_dict(artists=artists, X=X, y=y) min = np.amin(X, axis=0) max = np.amax(X, axis=0) dimension = 20 print('[TSNE-1 - TSNE-2]') print('min values') print(np.amin(X, axis=0)) print('max values') print(np.amax(X, axis=0)) print('mean values') print(np.mean(X, axis=0)) print('variance values') print(np.var(X, axis=0)) artists = clean_similar_artists(artists=artists) print('GENERATE HEATMAPS') gen_heatmaps_master(dimension=dimension, min=min, max=max) print('SAVING DATA') save_data(artists, filename=output_pkl) print('PLOT HEATMAPS in ', output_path) plot_heatmaps_master(dimension=dimension, min=min, max=max)
def main(args): n_chunks = args.n_chunks chunk_folder = args.chunk_folder if chunk_folder[-1] != '/': chunk_folder += '/' #group all chunk level ranking in a single ranking file ranking = dict() for i in range(n_chunks): chunk_filename = 'chunk_' + str(i) + '.pkl_OUT.pkl' chunk_pathname = chunk_folder+chunk_filename chunk_out = load_data(filename=chunk_pathname) for k,v in chunk_out.items(): ranking[k]=v final_pathname= chunk_folder+'merged_OUT.pkl' save_data(ranking,filename=final_pathname)
def main(args): input_path = args.input_pkl output_path = args.output_path if output_path[-1] != '/': output_path += '/' output_names = output_path + 'names.pkl' output_heatmaps = output_path + 'heatmaps.pkl' output_gt = output_path + 'ground_truth.pkl' artists = load_data(filename=input_path) names = dict() heatmaps = dict() ground_truth = dict() for id_, artist in artists.items(): names[id_] = artist.id heatmaps[id_] = artist.tsne_heatmap ground_truth[id_] = artist.similar_artists save_data(filename=output_heatmaps, dict=heatmaps) save_data(filename=output_names, dict=names) save_data(filename=output_gt, dict=ground_truth)