Пример #1
0
def extract_features(tracks_dir="tracks/", feat_dir="features/"):
    utils.print_success("Extracting features")
    tracks_fn = os.listdir(tracks_dir)
    utils.create_dir(feat_dir)
    feat_dir = utils.create_dir(feat_dir + "svmbff")
    bextract = "bextract -mfcc -zcrs -ctd -rlf -flx -ws 1024 -as 898 -sv -fe "
    for index, filename in enumerate(tracks_fn):
        utils.print_progress_start(
            str(index) + "/" + str(len(tracks_fn)) + " " + filename)
        track_path = filename + ".mf"
        with open(track_path, "w") as filep:
            filep.write(tracks_dir + filename + "\n")
        new_fn = filename.split(".")[0] + ".arff"
        try:
            os.system(bextract + track_path + " -w " + new_fn +
                      "> /dev/null 2>&1")
        except:
            utils.print_info(
                "You have to make marsyas available systemwide, tips:")
            utils.print_info(
                "http://marsyas.info/doc/manual/marsyas-user/Step_002dby_002dstep-building-instructions.html#Step_002dby_002dstep-building-instructions"
            )
            utils.print_info("http://stackoverflow.com/a/21173918")
            utils.print_error("Program exit")
        # print(new_fn)
        # print(feat_dir + " " + new_fn)
        os.rename(new_fn, feat_dir + new_fn)
        # os.rename("MARSYAS_EMPTY" + new_fn, feat_dir + new_fn)
        os.system("rm " + track_path)
    utils.print_progress_end()
    os.system("rm bextract_single.mf")
def yaafe_feat_extraction(dir_tracks):
    """Description of yaafe_feat_extraction
    yaafe.py -r 22050 -f "mfcc: MFCC blockSize=2048 stepSize=1024" audio_fn.txt
    """
    utils.print_success("YAAFE features extraction (approx. 8 minutes)")
    
    # Assert Python version
    if sys.version_info.major != 2:
        utils.print_error("Yaafe needs Python 2 environment")
    
    # Assert folder exists
    dir_tracks = utils.abs_path_dir(dir_tracks)    
    
    filelist = os.listdir(dir_tracks)
    dir_feat = utils.create_dir(utils.create_dir("features") + "database1")
    # dir_tmp = utils.create_dir("tmp")
    # dir_yaafe = utils.create_dir(dir_tmp + "yaafe")
    # fn_filelist = dir_yaafe + "filelist.txt"
    dir_current = os.getcwd()
    os.chdir(dir_tracks)
    yaafe_cmd = 'yaafe -r 22050 -f "mfcc: MFCC blockSize=2048 stepSize=1024" '
    yaafe_cmd += "--resample -b " + dir_feat + " "
    for index, filen in enumerate(filelist):
        utils.print_progress_start(str(index+1) + "/" + str(len(filelist)) + " " + filen)
        os.system(yaafe_cmd + filen + "> /dev/null 2>&1")
    utils.print_progress_end()
    os.chdir(dir_current)
Пример #3
0
def run_kea_on_folds(folds_dir):
    """Description of run_kea_on_folds

    Wrapper for kea on folds
    """
    folds_dir = utils.abs_path_dir(folds_dir)
    out_file = folds_dir + "/results.txt"
    if os.path.exists(folds_dir + "/train_test.arff"):
        train_file = folds_dir + "/train_test.arff"
        test_file = train_file
        run_kea(train_file, test_file, out_file)
    else:
        nb_folds = len([
            name for name in os.listdir(folds_dir)
            if os.path.isfile(os.path.join(folds_dir, name))
        ])
        # Run on multiple train/test
        for index in range(1, int(nb_folds / 2) + 1):
            utils.print_progress_start("Train/Test on fold " + str(index))
            train_file = folds_dir + "/train_" + str(index).zfill(2) + ".arff"
            test_file = folds_dir + "/test_" + str(index).zfill(2) + ".arff"
            out_file = folds_dir + "/results_" + str(index).zfill(2) + ".arff"
            run_kea(train_file, test_file, out_file)
        utils.print_progress_end()
        utils.print_warning("TODO multiprocessing")
Пример #4
0
def preprocess_features(folder):
    utils.print_success("Preprocessing train set")
    folder = utils.abs_path_dir(folder)
    filelist = os.listdir(folder)
    nb_file = str(len(filelist))
    for index, filename in enumerate(filelist):
        utils.print_progress_start(str(index) + "/" + nb_file + " " + filename)
        convert_feats_files(folder + filename)
    utils.print_progress_end()
Пример #5
0
def generate_singing_voice_track(paths):
    """
    @brief      { function_description }
    
    @param      dir_audio  The dir audio
    
    @return     { description_of_the_return_value }
    """
    utils.print_success("Generating singing voice tracks")
    for index, folder in enumerate(paths):
        utils.print_progress_start(
            str(index) + "/" + str(len(paths)) + " " + folder)
        if os.path.isdir(folder) and os.path.exists(folder):
            filelist = os.listdir(folder)
            for filen in filelist:
                nb_error = 0
                if "-bv." in filen:
                    utils.print_error("Backing vocals file found in " + filen)
                if filen.endswith('-ld.wav'):
                    song = filen
                    instru = filen.replace("-ld", "")
                    try:
                        song_samples, song_fs = sf.read(folder + "/" + song)
                    except RuntimeError as run_err:
                        error("RuntimeError", str(run_err))
                        nb_error += 1
                    try:
                        instru_samples, instru_fs = sf.read(folder + "/" +
                                                            instru)
                    except RuntimeError as run_err:
                        error("RuntimeError", str(run_err))
                        nb_error += 1
                    if nb_error == 0:
                        if song_fs != instru_fs:
                            error("SamplingFreq", filen)
                        elif len(instru_samples) != len(song_samples):
                            error("SampleSize", filen)
                        else:
                            voice_samples = song_samples - instru_samples
                            # print(instru)
                            # print(song)
                            # utils.print_error(len(song_samples))
                            # print(song_samples.shape)
                            # print(len(instru_samples))
                            # print(instru_samples.shape)
                            # print(len(voice_samples))
                            # print(voice_samples.shape)
                            sf.write(
                                folder + "/" + filen.replace("nbv-ld", "sv"),
                                voice_samples, song_fs)
                            with open("available.txt", "a") as filep:
                                filep.write(folder + "/" + filen + "\n")
    utils.print_progress_end()
    return "available.txt"
Пример #6
0
def match_feat_with_song_gt(dir_feat, dir_gts):
    """Description of match_feat_gt

    Use groundtruth created by 
    http://www.mathieuramona.com/wp/data/jamendo/ 

    associate to local features
    csv 7041 lines yaafe
    lab 326.973 sec ramona
    Definition of YAAFE from 
    http://yaafe.sourceforge.net/features.html
    """
    utils.print_success("Matching local feat to song/instru groundtruths")
    dir_feat = utils.abs_path_dir(dir_feat)
    dir_gts = utils.abs_path_dir(dir_gts)
    block_size = 1024.
    step_size = 512.
    fech = 22050.
    frame_size_ms = block_size / fech
    filenames = [fn for fn in os.listdir(dir_gts)]
    for index, filename in enumerate(filenames):
        utils.print_progress_start(str(index) + "/" + str(len(filenames)) + " " + filename)
        # gather groundtruths
        groundtruths = []
        with open(dir_gts + filename, "r") as filep:
            for row in filep:
                line = row.split(" ")
                end = float(line[1])
                if "no" in line[2]:
                    tag = ",i\n"
                else:
                    tag = ",s\n"
                groundtruths.append([end, tag])
        gt_len = len(groundtruths)
        overflow = False
        gt_index = 0
        cpt = 0
        # Write features & groundtruths to file
        str_to_write = ""
        feat_fn = filename.split(".")[0]
        feat_fn += ".wav.mfcc.csv"
        with open(dir_feat + feat_fn, "r") as filep:
            for index, line in enumerate(filep):
                # todo cleanup
                if gt_index < gt_len:
                    if frame_size_ms * index > groundtruths[gt_index][0]:
                        gt_index += 1
                    if gt_index < gt_len:
                        str_to_write += line[:-1] + groundtruths[gt_index][1]
        with open(dir_feat + feat_fn, "w") as filep:
            filep.write(str_to_write)
    utils.print_progress_end()
Пример #7
0
def merge_arff(indir, outfilename):
    """Description of merge_arff

    bextract program from Marsyas generate one output file per audio file
    This function merge them all in one unique file
    Check if analysed file are valid i.e. not empty
    """
    utils.print_success("Preprocessing ARFFs")
    indir = utils.abs_path_dir(indir)
    filenames = os.listdir(indir)
    outfn = open(outfilename, 'w')
    cpt_invalid_fn = 0
    # Write first lines of ARFF template file
    for filename in filenames:
        if os.path.isfile(indir + filename):
            new_fn = validate_arff(indir + filename)
            if new_fn:
                with open(new_fn, 'r') as template:
                    nb_line = 74
                    for line in template:
                        if not nb_line:
                            break
                        nb_line -= 1
                        outfn.write(line)
                    break
            else:
                cpt_invalid_fn += 1
    # Append all arff file to the output file
    cur_file_num = 1
    for filename in filenames:
        if os.path.isfile(indir + filename):
            new_fn = validate_arff(indir + filename)
            if new_fn:
                cur_file_num = cur_file_num + 1
                utils.print_progress_start("Analysing file\t" +
                                           str(cur_file_num))
                fname = open(new_fn, 'r')
                outfn.write("".join(fname.readlines()[74:77]))
                fname.close()
            else:
                cpt_invalid_fn += 1
    utils.print_progress_end()
    outfn.close()
    # os.system("rm " + indir + "*.arff")
    if cpt_invalid_fn:
        utils.print_warning(
            str(cpt_invalid_fn) + " ARFF files with errors found")
    return outfilename
Пример #8
0
def extract_features(dir_audio, dir_feat):
    dir_audio = utils.abs_path_dir(dir_audio)
    dir_feat = utils.abs_path_dir(dir_feat)
    filelist = []
    for elem in os.listdir(dir_audio):
        if os.path.isfile(dir_audio + elem):
            filelist.append(dir_audio + elem)
        else:
            for filename in os.listdir(dir_audio + elem):
                if "ld.wav" in filename:
                    filelist.append(dir_audio + elem + "/" + filename)
    # marsyas(dir_feat, filelist)
    for index, filen in enumerate(filelist):
        utils.print_progress_start(str(index+1) + "/" + str(len(filelist)) + " " + filen.split(os.sep)[-1])
        utils.yaafe(filen)
        essentia(dir_feat, filen)
    utils.print_progress_end()
import os
import sys
sys.path.insert(0, './src/')
import utils

dir_tracks = "tracks/"
utils.print_success("YAAFE features extraction (approx. 8 minutes)")

# Assert Python version
if sys.version_info.major != 2:
    utils.print_error("Yaafe needs Python 2 environment")

# Assert folder exists
dir_tracks = utils.abs_path_dir(dir_tracks)

filelist = os.listdir(dir_tracks)
dir_feat = utils.create_dir(utils.create_dir("features") + "database1")
# dir_tmp = utils.create_dir("tmp")
# dir_yaafe = utils.create_dir(dir_tmp + "yaafe")
# fn_filelist = dir_yaafe + "filelist.txt"
dir_current = os.getcwd()
os.chdir(dir_tracks)
yaafe_cmd = 'yaafe -r 22050 -f "mfcc: MFCC blockSize=2048 stepSize=1024" '
yaafe_cmd += "--resample -b " + dir_feat + " "
for index, filen in enumerate(filelist):
    utils.print_progress_start(
        str(index + 1) + "/" + str(len(filelist)) + " " + filen)
    os.system(yaafe_cmd + filen + "> /dev/null 2>&1")
utils.print_progress_end()
os.chdir(dir_current)
Пример #10
0
def create_track_feat_testset(folder, infile, outfile, model_file, train=False):
    """Description of create_track_feat_testset
    Need to read each test file
    compute deltas on mfcc in the ram
    predict and predict_proba 
    generate song and instru ngrams and histograms
    Add the mean of mfcc+deltas
    append 109 features vector in feat_track/feat_test.csv
    """

    utils.print_success("Create track feat testset")
    folder = utils.abs_path_dir(folder)
    infile = utils.abs_path_file(infile)
    clf = joblib.load(model_file)
    track_gts = read_gts(infile, separator=",")
    for index, filename in enumerate(track_gts):
        utils.print_progress_start(str(index+1) + "/" + str(len(track_gts)) + " " + filename)
        mfccs = []
        mfccs_1 = []
        extension = ""
        if train:
            extension = ""
        else:
            extension += "_audio_full_mono_22k"
        extension += ".wav.mfcc.csv"
        with open(folder + filename + extension, "r") as filep:
            if train:
                next(filep)
                next(filep)
                next(filep)
                next(filep)
                next(filep)
            for line in filep:
                if train:
                    line = line.split(",")
                else:
                    line = line.split(" ")
                mfccs_1.append(str2arr(line[:-1]))
                # if train:
                #     mfccs.append(str2arr(line[:-1]))
                # else:
                #     mfccs.append(str2arr(line[0:]))
        mfccs = np.array(mfccs_1)
        delta_mfcc = librosa.feature.delta(mfccs)
        delta2_mfcc = librosa.feature.delta(mfccs, order=2)
        tmp = np.append(mfccs, delta_mfcc, axis=1)
        features = np.append(tmp, delta2_mfcc, axis=1)
        preds_proba = clf.predict_proba(features)

        # Histogramm
        nb_hist_class = 10
        numbers = column(preds_proba, 0)
        hist_pred = np.histogram(numbers, nb_hist_class)
        hist_pred_norm = hist_pred[0] / float(sum(hist_pred[0]))

        ngram_threshold = 0.5
        song_ngram_proba = ngram_proba(local_pred=numbers, threshold=ngram_threshold, above_threshold=True)
        instru_ngram_proba = ngram_proba(local_pred=numbers, threshold=ngram_threshold, above_threshold=False)
        
        preds = clf.predict(features)
        song_ngram = ngram(preds, "s")
        instru_ngram = ngram(preds, "i")

        with open(outfile, "a") as filep:
            filep.write(filename[:12] + "," +
                arr2str(np.mean(mfccs, axis=0)) + "," + 
                arr2str(np.mean(delta_mfcc, axis=0)) + "," + 
                arr2str(np.mean(delta2_mfcc, axis=0)) + "," + 
                arr2str(hist_pred_norm) + "," +
                song_ngram_proba + "," + 
                instru_ngram_proba + "," +
                song_ngram + "," + 
                instru_ngram + "," +
                track_gts[filename] + "\n")
    utils.print_progress_end()
Пример #11
0
def process_local_feat(indir, file_gts_track, outdir_local, out_feat_global, train):
    """Description of process_local_feat
    Add delta and double delta to MFCCs
    """
    
    utils.print_success("Processing local features")
    
    # Preprocess arg
    indir = utils.abs_path_dir(indir)
    file_gts_track = utils.abs_path_file(file_gts_track)
    filelist = os.listdir(indir)
    outdir_local = utils.abs_path_dir(outdir_local)

    track_gts = {}
    with open(file_gts_track, "r") as filep:
        for line in filep:
            line = line.split(",")
            if train:
                index = line[0]
            else:
                index = line[0] + ".wav.mfcc.csv"
            track_gts[index] = line[1][:-1]

    for index, filename in enumerate(filelist):
        utils.print_progress_start(str(index) + "/" + str(len(filelist)) + " " + filename)
        if filename in track_gts:
            mfccs = []
            groundtruths = []
            with open(indir + filename, "r") as filep:
                next(filep)
                next(filep)
                next(filep)
                next(filep)
                next(filep)
                for line in filep:
                    line = line.split(",")
                    mfccs.append(str2arr(line[:-1]))
                    if train:
                        groundtruths.append(line[-1][:-1])
            mfccs = np.array(mfccs)
            delta_mfcc = librosa.feature.delta(mfccs)
            delta2_mfcc = librosa.feature.delta(mfccs, order=2)
            # Write local features in outdir_local
            with open(outdir_local + filename, "w") as filep:
                gt_to_write = ""
                if "i" in track_gts[filename]:
                    gt_to_write = ",i"
                elif "s" in track_gts[filename]:
                    # postpone frame groundtruth annotationa to another function later in the code
                    gt_to_write = ""
                else:
                    utils.print_warning("bayle.py line 231 local frame groundtruth undefined")
                if train:
                    for a, b, c, d in zip(mfccs, delta_mfcc, delta2_mfcc, groundtruths):
                        filep.write(arr2str(a) + "," + arr2str(b) + "," + arr2str(c) + "," + d + "\n")
                else:
                    for a, b, c in zip(mfccs, delta_mfcc, delta2_mfcc):
                        filep.write(arr2str(a) + "," + arr2str(b) + "," + arr2str(c) + gt_to_write + "\n")
            # # Write global features in out_feat_global
            # with open(out_feat_global, "a") as filep:
            #     filep.write(filename + "," +
            #         arr2str(np.mean(mfccs, axis=0)) + "," + 
            #         arr2str(np.mean(delta_mfcc, axis=0)) + "," + 
            #         arr2str(np.mean(delta2_mfcc, axis=0)) + "," + 
            #         track_gts[filename] + "\n")
    utils.print_progress_end()
    utils.print_success("Adding local groundtruths to Songs in Jamendo thanks to Ramona annotations")
    match_feat_with_song_gt(dir_feat=outdir_local, dir_gts="groundtruths/frame_annot_jamendo_ramona/")
    utils.print_success("Done")