def extract_features(tracks_dir="tracks/", feat_dir="features/"): utils.print_success("Extracting features") tracks_fn = os.listdir(tracks_dir) utils.create_dir(feat_dir) feat_dir = utils.create_dir(feat_dir + "svmbff") bextract = "bextract -mfcc -zcrs -ctd -rlf -flx -ws 1024 -as 898 -sv -fe " for index, filename in enumerate(tracks_fn): utils.print_progress_start( str(index) + "/" + str(len(tracks_fn)) + " " + filename) track_path = filename + ".mf" with open(track_path, "w") as filep: filep.write(tracks_dir + filename + "\n") new_fn = filename.split(".")[0] + ".arff" try: os.system(bextract + track_path + " -w " + new_fn + "> /dev/null 2>&1") except: utils.print_info( "You have to make marsyas available systemwide, tips:") utils.print_info( "http://marsyas.info/doc/manual/marsyas-user/Step_002dby_002dstep-building-instructions.html#Step_002dby_002dstep-building-instructions" ) utils.print_info("http://stackoverflow.com/a/21173918") utils.print_error("Program exit") # print(new_fn) # print(feat_dir + " " + new_fn) os.rename(new_fn, feat_dir + new_fn) # os.rename("MARSYAS_EMPTY" + new_fn, feat_dir + new_fn) os.system("rm " + track_path) utils.print_progress_end() os.system("rm bextract_single.mf")
def yaafe_feat_extraction(dir_tracks): """Description of yaafe_feat_extraction yaafe.py -r 22050 -f "mfcc: MFCC blockSize=2048 stepSize=1024" audio_fn.txt """ utils.print_success("YAAFE features extraction (approx. 8 minutes)") # Assert Python version if sys.version_info.major != 2: utils.print_error("Yaafe needs Python 2 environment") # Assert folder exists dir_tracks = utils.abs_path_dir(dir_tracks) filelist = os.listdir(dir_tracks) dir_feat = utils.create_dir(utils.create_dir("features") + "database1") # dir_tmp = utils.create_dir("tmp") # dir_yaafe = utils.create_dir(dir_tmp + "yaafe") # fn_filelist = dir_yaafe + "filelist.txt" dir_current = os.getcwd() os.chdir(dir_tracks) yaafe_cmd = 'yaafe -r 22050 -f "mfcc: MFCC blockSize=2048 stepSize=1024" ' yaafe_cmd += "--resample -b " + dir_feat + " " for index, filen in enumerate(filelist): utils.print_progress_start(str(index+1) + "/" + str(len(filelist)) + " " + filen) os.system(yaafe_cmd + filen + "> /dev/null 2>&1") utils.print_progress_end() os.chdir(dir_current)
def run_kea_on_folds(folds_dir): """Description of run_kea_on_folds Wrapper for kea on folds """ folds_dir = utils.abs_path_dir(folds_dir) out_file = folds_dir + "/results.txt" if os.path.exists(folds_dir + "/train_test.arff"): train_file = folds_dir + "/train_test.arff" test_file = train_file run_kea(train_file, test_file, out_file) else: nb_folds = len([ name for name in os.listdir(folds_dir) if os.path.isfile(os.path.join(folds_dir, name)) ]) # Run on multiple train/test for index in range(1, int(nb_folds / 2) + 1): utils.print_progress_start("Train/Test on fold " + str(index)) train_file = folds_dir + "/train_" + str(index).zfill(2) + ".arff" test_file = folds_dir + "/test_" + str(index).zfill(2) + ".arff" out_file = folds_dir + "/results_" + str(index).zfill(2) + ".arff" run_kea(train_file, test_file, out_file) utils.print_progress_end() utils.print_warning("TODO multiprocessing")
def preprocess_features(folder): utils.print_success("Preprocessing train set") folder = utils.abs_path_dir(folder) filelist = os.listdir(folder) nb_file = str(len(filelist)) for index, filename in enumerate(filelist): utils.print_progress_start(str(index) + "/" + nb_file + " " + filename) convert_feats_files(folder + filename) utils.print_progress_end()
def generate_singing_voice_track(paths): """ @brief { function_description } @param dir_audio The dir audio @return { description_of_the_return_value } """ utils.print_success("Generating singing voice tracks") for index, folder in enumerate(paths): utils.print_progress_start( str(index) + "/" + str(len(paths)) + " " + folder) if os.path.isdir(folder) and os.path.exists(folder): filelist = os.listdir(folder) for filen in filelist: nb_error = 0 if "-bv." in filen: utils.print_error("Backing vocals file found in " + filen) if filen.endswith('-ld.wav'): song = filen instru = filen.replace("-ld", "") try: song_samples, song_fs = sf.read(folder + "/" + song) except RuntimeError as run_err: error("RuntimeError", str(run_err)) nb_error += 1 try: instru_samples, instru_fs = sf.read(folder + "/" + instru) except RuntimeError as run_err: error("RuntimeError", str(run_err)) nb_error += 1 if nb_error == 0: if song_fs != instru_fs: error("SamplingFreq", filen) elif len(instru_samples) != len(song_samples): error("SampleSize", filen) else: voice_samples = song_samples - instru_samples # print(instru) # print(song) # utils.print_error(len(song_samples)) # print(song_samples.shape) # print(len(instru_samples)) # print(instru_samples.shape) # print(len(voice_samples)) # print(voice_samples.shape) sf.write( folder + "/" + filen.replace("nbv-ld", "sv"), voice_samples, song_fs) with open("available.txt", "a") as filep: filep.write(folder + "/" + filen + "\n") utils.print_progress_end() return "available.txt"
def match_feat_with_song_gt(dir_feat, dir_gts): """Description of match_feat_gt Use groundtruth created by http://www.mathieuramona.com/wp/data/jamendo/ associate to local features csv 7041 lines yaafe lab 326.973 sec ramona Definition of YAAFE from http://yaafe.sourceforge.net/features.html """ utils.print_success("Matching local feat to song/instru groundtruths") dir_feat = utils.abs_path_dir(dir_feat) dir_gts = utils.abs_path_dir(dir_gts) block_size = 1024. step_size = 512. fech = 22050. frame_size_ms = block_size / fech filenames = [fn for fn in os.listdir(dir_gts)] for index, filename in enumerate(filenames): utils.print_progress_start(str(index) + "/" + str(len(filenames)) + " " + filename) # gather groundtruths groundtruths = [] with open(dir_gts + filename, "r") as filep: for row in filep: line = row.split(" ") end = float(line[1]) if "no" in line[2]: tag = ",i\n" else: tag = ",s\n" groundtruths.append([end, tag]) gt_len = len(groundtruths) overflow = False gt_index = 0 cpt = 0 # Write features & groundtruths to file str_to_write = "" feat_fn = filename.split(".")[0] feat_fn += ".wav.mfcc.csv" with open(dir_feat + feat_fn, "r") as filep: for index, line in enumerate(filep): # todo cleanup if gt_index < gt_len: if frame_size_ms * index > groundtruths[gt_index][0]: gt_index += 1 if gt_index < gt_len: str_to_write += line[:-1] + groundtruths[gt_index][1] with open(dir_feat + feat_fn, "w") as filep: filep.write(str_to_write) utils.print_progress_end()
def merge_arff(indir, outfilename): """Description of merge_arff bextract program from Marsyas generate one output file per audio file This function merge them all in one unique file Check if analysed file are valid i.e. not empty """ utils.print_success("Preprocessing ARFFs") indir = utils.abs_path_dir(indir) filenames = os.listdir(indir) outfn = open(outfilename, 'w') cpt_invalid_fn = 0 # Write first lines of ARFF template file for filename in filenames: if os.path.isfile(indir + filename): new_fn = validate_arff(indir + filename) if new_fn: with open(new_fn, 'r') as template: nb_line = 74 for line in template: if not nb_line: break nb_line -= 1 outfn.write(line) break else: cpt_invalid_fn += 1 # Append all arff file to the output file cur_file_num = 1 for filename in filenames: if os.path.isfile(indir + filename): new_fn = validate_arff(indir + filename) if new_fn: cur_file_num = cur_file_num + 1 utils.print_progress_start("Analysing file\t" + str(cur_file_num)) fname = open(new_fn, 'r') outfn.write("".join(fname.readlines()[74:77])) fname.close() else: cpt_invalid_fn += 1 utils.print_progress_end() outfn.close() # os.system("rm " + indir + "*.arff") if cpt_invalid_fn: utils.print_warning( str(cpt_invalid_fn) + " ARFF files with errors found") return outfilename
def extract_features(dir_audio, dir_feat): dir_audio = utils.abs_path_dir(dir_audio) dir_feat = utils.abs_path_dir(dir_feat) filelist = [] for elem in os.listdir(dir_audio): if os.path.isfile(dir_audio + elem): filelist.append(dir_audio + elem) else: for filename in os.listdir(dir_audio + elem): if "ld.wav" in filename: filelist.append(dir_audio + elem + "/" + filename) # marsyas(dir_feat, filelist) for index, filen in enumerate(filelist): utils.print_progress_start(str(index+1) + "/" + str(len(filelist)) + " " + filen.split(os.sep)[-1]) utils.yaafe(filen) essentia(dir_feat, filen) utils.print_progress_end()
import os import sys sys.path.insert(0, './src/') import utils dir_tracks = "tracks/" utils.print_success("YAAFE features extraction (approx. 8 minutes)") # Assert Python version if sys.version_info.major != 2: utils.print_error("Yaafe needs Python 2 environment") # Assert folder exists dir_tracks = utils.abs_path_dir(dir_tracks) filelist = os.listdir(dir_tracks) dir_feat = utils.create_dir(utils.create_dir("features") + "database1") # dir_tmp = utils.create_dir("tmp") # dir_yaafe = utils.create_dir(dir_tmp + "yaafe") # fn_filelist = dir_yaafe + "filelist.txt" dir_current = os.getcwd() os.chdir(dir_tracks) yaafe_cmd = 'yaafe -r 22050 -f "mfcc: MFCC blockSize=2048 stepSize=1024" ' yaafe_cmd += "--resample -b " + dir_feat + " " for index, filen in enumerate(filelist): utils.print_progress_start( str(index + 1) + "/" + str(len(filelist)) + " " + filen) os.system(yaafe_cmd + filen + "> /dev/null 2>&1") utils.print_progress_end() os.chdir(dir_current)
def create_track_feat_testset(folder, infile, outfile, model_file, train=False): """Description of create_track_feat_testset Need to read each test file compute deltas on mfcc in the ram predict and predict_proba generate song and instru ngrams and histograms Add the mean of mfcc+deltas append 109 features vector in feat_track/feat_test.csv """ utils.print_success("Create track feat testset") folder = utils.abs_path_dir(folder) infile = utils.abs_path_file(infile) clf = joblib.load(model_file) track_gts = read_gts(infile, separator=",") for index, filename in enumerate(track_gts): utils.print_progress_start(str(index+1) + "/" + str(len(track_gts)) + " " + filename) mfccs = [] mfccs_1 = [] extension = "" if train: extension = "" else: extension += "_audio_full_mono_22k" extension += ".wav.mfcc.csv" with open(folder + filename + extension, "r") as filep: if train: next(filep) next(filep) next(filep) next(filep) next(filep) for line in filep: if train: line = line.split(",") else: line = line.split(" ") mfccs_1.append(str2arr(line[:-1])) # if train: # mfccs.append(str2arr(line[:-1])) # else: # mfccs.append(str2arr(line[0:])) mfccs = np.array(mfccs_1) delta_mfcc = librosa.feature.delta(mfccs) delta2_mfcc = librosa.feature.delta(mfccs, order=2) tmp = np.append(mfccs, delta_mfcc, axis=1) features = np.append(tmp, delta2_mfcc, axis=1) preds_proba = clf.predict_proba(features) # Histogramm nb_hist_class = 10 numbers = column(preds_proba, 0) hist_pred = np.histogram(numbers, nb_hist_class) hist_pred_norm = hist_pred[0] / float(sum(hist_pred[0])) ngram_threshold = 0.5 song_ngram_proba = ngram_proba(local_pred=numbers, threshold=ngram_threshold, above_threshold=True) instru_ngram_proba = ngram_proba(local_pred=numbers, threshold=ngram_threshold, above_threshold=False) preds = clf.predict(features) song_ngram = ngram(preds, "s") instru_ngram = ngram(preds, "i") with open(outfile, "a") as filep: filep.write(filename[:12] + "," + arr2str(np.mean(mfccs, axis=0)) + "," + arr2str(np.mean(delta_mfcc, axis=0)) + "," + arr2str(np.mean(delta2_mfcc, axis=0)) + "," + arr2str(hist_pred_norm) + "," + song_ngram_proba + "," + instru_ngram_proba + "," + song_ngram + "," + instru_ngram + "," + track_gts[filename] + "\n") utils.print_progress_end()
def process_local_feat(indir, file_gts_track, outdir_local, out_feat_global, train): """Description of process_local_feat Add delta and double delta to MFCCs """ utils.print_success("Processing local features") # Preprocess arg indir = utils.abs_path_dir(indir) file_gts_track = utils.abs_path_file(file_gts_track) filelist = os.listdir(indir) outdir_local = utils.abs_path_dir(outdir_local) track_gts = {} with open(file_gts_track, "r") as filep: for line in filep: line = line.split(",") if train: index = line[0] else: index = line[0] + ".wav.mfcc.csv" track_gts[index] = line[1][:-1] for index, filename in enumerate(filelist): utils.print_progress_start(str(index) + "/" + str(len(filelist)) + " " + filename) if filename in track_gts: mfccs = [] groundtruths = [] with open(indir + filename, "r") as filep: next(filep) next(filep) next(filep) next(filep) next(filep) for line in filep: line = line.split(",") mfccs.append(str2arr(line[:-1])) if train: groundtruths.append(line[-1][:-1]) mfccs = np.array(mfccs) delta_mfcc = librosa.feature.delta(mfccs) delta2_mfcc = librosa.feature.delta(mfccs, order=2) # Write local features in outdir_local with open(outdir_local + filename, "w") as filep: gt_to_write = "" if "i" in track_gts[filename]: gt_to_write = ",i" elif "s" in track_gts[filename]: # postpone frame groundtruth annotationa to another function later in the code gt_to_write = "" else: utils.print_warning("bayle.py line 231 local frame groundtruth undefined") if train: for a, b, c, d in zip(mfccs, delta_mfcc, delta2_mfcc, groundtruths): filep.write(arr2str(a) + "," + arr2str(b) + "," + arr2str(c) + "," + d + "\n") else: for a, b, c in zip(mfccs, delta_mfcc, delta2_mfcc): filep.write(arr2str(a) + "," + arr2str(b) + "," + arr2str(c) + gt_to_write + "\n") # # Write global features in out_feat_global # with open(out_feat_global, "a") as filep: # filep.write(filename + "," + # arr2str(np.mean(mfccs, axis=0)) + "," + # arr2str(np.mean(delta_mfcc, axis=0)) + "," + # arr2str(np.mean(delta2_mfcc, axis=0)) + "," + # track_gts[filename] + "\n") utils.print_progress_end() utils.print_success("Adding local groundtruths to Songs in Jamendo thanks to Ramona annotations") match_feat_with_song_gt(dir_feat=outdir_local, dir_gts="groundtruths/frame_annot_jamendo_ramona/") utils.print_success("Done")