def Augmented_Spectrograms(WIDTH, HEIGHT, DPI, SONG_DURATION, SR): directory = './AugmentedData' tracks = utils.load('./fma_metadata/tracks.csv') #STARTING ID BE SURE TO MAKE SURE NO CONFLICTS augmented_id = 200000 for subdir, dirs, files in os.walk(directory): path_list = subdir.split(os.sep) for file in files: full_path = os.path.join(subdir, file) name = os.path.splitext(os.path.basename(file)) id = name[0] if (tracks['set', 'split'][int(id)] == "training"): new_row = [augmented_id, tracks['track', 'genre_top'][int(id)]] create_spectrogram(full_path, str(augmented_id), "train", WIDTH, HEIGHT, DPI, SR) append_list_as_row('./Data/train.csv', new_row) augmented_id += 1
def mp3_Convert(num_files_to_convert): AUDIO_DIR = './fma_small/' tracks = utils.load('./fma_metadata/tracks.csv') try: os.mkdir('Samples') except: print("Sample Folder Exists. No Creation Necessary") small = tracks['set', 'subset'] <= 'small' y_small = tracks.loc[small, ('track', 'genre_top')] sr = 44100 file_count = 0 for track_id, genre in y_small.iteritems(): if not os.path.exists('Samples/'+genre): os.mkdir('Samples/'+genre) mp3_filename = utils.get_audio_path(AUDIO_DIR, track_id) out_wav_filename = 'Samples/'+genre+'/'+str(track_id)+'.wav' in_wav_filename = out_wav_filename cmd = 'ffmpeg -hide_banner -loglevel panic -i ' + mp3_filename + ' ' + in_wav_filename print("excuting conversion: "+cmd) try: os.system(cmd) except: print("Could not find file:" + mp3_filename) continue # os.system(cmd) - we use subprocess instead of this ## We could just have ffmpeg do the full conversion, but we'll let librosa ## apply its own defaults by reading & writing print("reading ",in_wav_filename) try: data, sr = librosa.load(in_wav_filename, sr=sr, mono=True) except: print("Failed reading the converted in_wav:" + in_wav_filename) continue print("writing ",out_wav_filename) try: librosa.output.write_wav(out_wav_filename,data,sr=sr) except: print("failed writing the output_wav:"+out_wav_filename) continue file_count += 1 if file_count > num_files_to_convert: break print(".mp3 file conversion to .wav complete")
def Generate_CSV(): print("Begin CSV generation") tracks = utils.load('./fma_metadata/tracks.csv') #genres = utils.load('./fma_metadata/genres.csv') #features = utils.load('./fma_metadata/features.csv') #echonest = utils.load('./fma_metadata/echonest.csv') #print(tracks['track', 'genre_top'][1482]) try: os.mkdir("Data") except: print("Data folder exists, skip creation") #Creating a list of ids that are in FMA_SMALL, so that we only create a csv file for data we have fma_small_track_id = [] directory = './Data' for subdir, dirs, files in os.walk(directory): for file in files: name = os.path.splitext(os.path.basename(file)) id = name[0] fma_small_track_id.append(id) data_top = tracks['set', 'split'].head(len(tracks['set', 'split'])) df_train = pd.DataFrame(columns=['ID', 'Genre']) df_validation = pd.DataFrame(columns=['ID', 'Genre']) df_test = pd.DataFrame(columns=['ID', 'Genre']) for id in data_top.index: if str(id) in fma_small_track_id: # if(tracks['set', 'split'][id] == "training"): # print("Adding Training Data", id) # df_train = df_train.append({'ID': id, 'Genre': tracks['track', 'genre_top'][id]}, ignore_index = True) if(tracks['set', 'split'][id] == "test"): print("Adding Testing Data", id) df_test = df_test.append({'ID': id, 'Genre': tracks['track', 'genre_top'][id]}, ignore_index = True) elif(tracks['set', 'split'][id] == "training"): print("Adding Training Data", id) df_train = df_train.append({'ID': id, 'Genre': tracks['track', 'genre_top'][id]}, ignore_index = True) else: print("Adding Validation Data", id) df_validation= df_validation.append({'ID': id, 'Genre': tracks['track', 'genre_top'][id]}, ignore_index = True) df_train.to_csv('Data/train.csv', encoding='utf-8', index= False, header=False) df_validation.to_csv('Data/validate.csv', encoding='utf-8', index= False, header=False) df_test.to_csv('Data/test.csv', encoding='utf-8', index= False, header=False) print("Completed CSV generation, check the ./Data folder for your .csv")
def Tempo_Change_Convert(tempo): AUDIO_DIR = './Samples/' tracks = utils.load('./fma_metadata/tracks.csv') try: os.mkdir('./AugmentedData/Tempo') except: print("Tempo Folder Exists. No Creation Necessary") for subdir, dirs, files in os.walk(AUDIO_DIR): path_list = subdir.split(os.sep) for file in files: full_path = os.path.join(subdir, file) name = os.path.splitext(os.path.basename(file)) id = name[0] if (tracks['set', 'split'][int(id)] == "training"): y, sr = librosa.load(full_path) y_speed = librosa.effects.time_stretch(y, tempo) out_wav_filename = './AugmentedData/Tempo/' + id + '.wav' librosa.output.write_wav(out_wav_filename, y_speed, sr=sr)
def Pitch_Shift_Convert(pitch): AUDIO_DIR = './Samples/' tracks = utils.load('./fma_metadata/tracks.csv') try: os.mkdir('./AugmentedData/Pitch') except: print("Pitch Folder Exists. No Creation Necessary") for subdir, dirs, files in os.walk(AUDIO_DIR): path_list = subdir.split(os.sep) for file in files: full_path = os.path.join(subdir, file) name = os.path.splitext(os.path.basename(file)) id = name[0] if (tracks['set', 'split'][int(id)] == "training"): y, sr = librosa.load(full_path) y_one = librosa.effects.pitch_shift(y, sr, n_steps=pitch) out_wav_filename = './AugmentedData/Pitch/' + id + '.wav' librosa.output.write_wav(out_wav_filename, y_one, sr=sr)
def Spectrogram_Create(WIDTH, HEIGHT, DPI, SONG_DURATION, SR): tracks = utils.load('./fma_metadata/tracks.csv') #Spectrograph training/validation/test set try: os.mkdir("Data") except: print("Data folder exists, skip creation") try: os.mkdir("Data/Train") except: print("Data/Train folder exists, skip creation") try: os.mkdir("Data/Test") except: print("Data/Test folder exists, skip creation") try: os.mkdir("Data/Validate") except: print("Data/Validate folder exists, skip creation") data_set = tracks['set', 'split'] directory = './Samples' for subdir, dirs, files in os.walk(directory): if (subdir != directory): path_list = subdir.split(os.sep) print("Begin creating spectrograms for {} genre".format( path_list[2])) for file in files: full_path = os.path.join(subdir, file) name = os.path.splitext(os.path.basename(file)) id = name[0] if (tracks['set', 'split'][int(id)] == "test"): create_spectrogram(full_path, id, "test", WIDTH, HEIGHT, DPI, SONG_DURATION, SR) elif (tracks['set', 'split'][int(id)] == "training"): create_spectrogram(full_path, id, "train", WIDTH, HEIGHT, DPI, SONG_DURATION, SR) else: create_spectrogram(full_path, id, "validate", WIDTH, HEIGHT, DPI, SONG_DURATION, SR) print("Finished creating spectrograms for {} genre".format( path_list[2])) gc.collect() #Organized storage of spectrograms # try: # os.mkdir('Spectrogram') # except: # print("Spectrogram folder exists, skip creation") #Not for training or testing just to categorize the imgs into their own folders # for subdir, dirs, files in os.walk(directory): # if(subdir != directory): # try: # path_list = subdir.split(os.sep) # os.mkdir("Spectrogram/" + path_list[2]) # except: # print("{} folder already exists, skip creation".format(path_list[2]) ) # print("Begin creating spectrograms for {} genre".format(path_list[2])) # for file in files: # full_path = os.path.join(subdir, file) # name = os.path.splitext(os.path.basename(file)) # id = name[0] # create_spectrogram(full_path, id, "general") # print("Finished creating spectrograms for {} genre".format(path_list[2]))