Exemplo n.º 1
0
def preprocess_yaafe_features(dir_features="features/database1/"):
    utils.print_success("Preprocessing YAAFE's features  (approx. 2 minutes)")
    groundtruths = utils.read_groundtruths("groundtruths/database1.csv")
    dir_features = utils.abs_path_dir(dir_features)
    filenames = os.listdir(dir_features)
    dir_tmp = utils.create_dir(utils.create_dir("src/tmp") + "ghosal")
    res_file_name = dir_tmp + "database1.csv"
    res_file = open(res_file_name, "w")
    res_file.write(
        "filename,MFCC_01,MFCC_02,MFCC_03,MFCC_04,MFCC_05,MFCC_06,MFCC_07,MFCC_08,MFCC_09,MFCC_10,MFCC_11,MFCC_12,MFCC_13,tag\n"
    )
    nb_header_lines = 4
    for index, filename in enumerate(filenames):
        utils.print_progress_start(
            str(index + 1) + "/" + str(len(filenames)) + " " + filename)
        with open(dir_features + filename, "r+") as filep:
            tmp_mfcc = np.zeros(shape=(13, 1))
            for line_index, line in enumerate(filep):
                # Skip 5 first header lines generated by YAAFE
                if line_index > nb_header_lines:
                    index = 0
                    mfccs = line[:-1].split(",")
                    for mfcc in mfccs:
                        tmp_mfcc[index] += float(mfcc)
                        index += 1
            tmp_mfcc /= (line_index - nb_header_lines)
            mfcc_str = ["%.15f" % number for number in tmp_mfcc]
            filen = filename.split(".")[0]
            if filen in groundtruths:
                res_file.write(filen + "," + ",".join(mfcc_str) + "," +
                               groundtruths[filen] + "\n")
    res_file.close()
    return res_file_name
def yaafe_feat_extraction(dir_tracks):
    """Description of yaafe_feat_extraction
    yaafe.py -r 22050 -f "mfcc: MFCC blockSize=2048 stepSize=1024" audio_fn.txt
    """
    utils.print_success("YAAFE features extraction (approx. 8 minutes)")
    
    # Assert Python version
    if sys.version_info.major != 2:
        utils.print_error("Yaafe needs Python 2 environment")
    
    # Assert folder exists
    dir_tracks = utils.abs_path_dir(dir_tracks)    
    
    filelist = os.listdir(dir_tracks)
    dir_feat = utils.create_dir(utils.create_dir("features") + "database1")
    # dir_tmp = utils.create_dir("tmp")
    # dir_yaafe = utils.create_dir(dir_tmp + "yaafe")
    # fn_filelist = dir_yaafe + "filelist.txt"
    dir_current = os.getcwd()
    os.chdir(dir_tracks)
    yaafe_cmd = 'yaafe -r 22050 -f "mfcc: MFCC blockSize=2048 stepSize=1024" '
    yaafe_cmd += "--resample -b " + dir_feat + " "
    for index, filen in enumerate(filelist):
        utils.print_progress_start(str(index+1) + "/" + str(len(filelist)) + " " + filen)
        os.system(yaafe_cmd + filen + "> /dev/null 2>&1")
    utils.print_progress_end()
    os.chdir(dir_current)
Exemplo n.º 3
0
def extract_features(tracks_dir="tracks/", feat_dir="features/"):
    utils.print_success("Extracting features")
    tracks_fn = os.listdir(tracks_dir)
    utils.create_dir(feat_dir)
    feat_dir = utils.create_dir(feat_dir + "svmbff")
    bextract = "bextract -mfcc -zcrs -ctd -rlf -flx -ws 1024 -as 898 -sv -fe "
    for index, filename in enumerate(tracks_fn):
        utils.print_progress_start(
            str(index) + "/" + str(len(tracks_fn)) + " " + filename)
        track_path = filename + ".mf"
        with open(track_path, "w") as filep:
            filep.write(tracks_dir + filename + "\n")
        new_fn = filename.split(".")[0] + ".arff"
        try:
            os.system(bextract + track_path + " -w " + new_fn +
                      "> /dev/null 2>&1")
        except:
            utils.print_info(
                "You have to make marsyas available systemwide, tips:")
            utils.print_info(
                "http://marsyas.info/doc/manual/marsyas-user/Step_002dby_002dstep-building-instructions.html#Step_002dby_002dstep-building-instructions"
            )
            utils.print_info("http://stackoverflow.com/a/21173918")
            utils.print_error("Program exit")
        # print(new_fn)
        # print(feat_dir + " " + new_fn)
        os.rename(new_fn, feat_dir + new_fn)
        # os.rename("MARSYAS_EMPTY" + new_fn, feat_dir + new_fn)
        os.system("rm " + track_path)
    utils.print_progress_end()
    os.system("rm bextract_single.mf")
Exemplo n.º 4
0
def marsyas(out_dir, filelist):
    """Definition of marsyas

    bextract is the cmd in marsyas that extract the features.
    It needs as input a file which contains a list of audio files to compute.
    If an audio file is corrupted, bextract crashes.
    So, it is necessary to call bextract with only one audio file each time. 

    bextract then produces one output file for each audio file.
    It is neccesary to merge those files into one common file.
    """
    dir_feat = utils.create_dir(utils.create_dir(out_dir) + "marsyas/")
    tmp = "tmp.mf"
    for index, filen in enumerate(filelist):
        utils.print_progress_start(str(index+1) + "/" + str(len(filelist)) + " " + filen.split(os.sep)[-1])
        dir_audio = filen.split("/")[:-1]
        filen = filen.split("/")[-1]
        filen = filen.replace(" ", "_")
        filen = filen.replace("'", "_")
        filen = filen.replace('"', "_")
        # tmp = filen + ".mf"
        with open(tmp, "a") as filep:
            filep.write(os.sep.join(dir_audio) + os.sep + filen + "\n")
        outfilename = dir_feat + filen + ".arff"
    bextract_features(tmp, outfilename, verbose=True)
    os.remove(tmp)
    merge_arff(dir_feat, out_dir + "marsyas.arff")
Exemplo n.º 5
0
def run_kea_on_folds(folds_dir):
    """Description of run_kea_on_folds

    Wrapper for kea on folds
    """
    folds_dir = utils.abs_path_dir(folds_dir)
    out_file = folds_dir + "/results.txt"
    if os.path.exists(folds_dir + "/train_test.arff"):
        train_file = folds_dir + "/train_test.arff"
        test_file = train_file
        run_kea(train_file, test_file, out_file)
    else:
        nb_folds = len([
            name for name in os.listdir(folds_dir)
            if os.path.isfile(os.path.join(folds_dir, name))
        ])
        # Run on multiple train/test
        for index in range(1, int(nb_folds / 2) + 1):
            utils.print_progress_start("Train/Test on fold " + str(index))
            train_file = folds_dir + "/train_" + str(index).zfill(2) + ".arff"
            test_file = folds_dir + "/test_" + str(index).zfill(2) + ".arff"
            out_file = folds_dir + "/results_" + str(index).zfill(2) + ".arff"
            run_kea(train_file, test_file, out_file)
        utils.print_progress_end()
        utils.print_warning("TODO multiprocessing")
Exemplo n.º 6
0
def preprocess_features(folder):
    utils.print_success("Preprocessing train set")
    folder = utils.abs_path_dir(folder)
    filelist = os.listdir(folder)
    nb_file = str(len(filelist))
    for index, filename in enumerate(filelist):
        utils.print_progress_start(str(index) + "/" + nb_file + " " + filename)
        convert_feats_files(folder + filename)
    utils.print_progress_end()
Exemplo n.º 7
0
def generate_singing_voice_track(paths):
    """
    @brief      { function_description }
    
    @param      dir_audio  The dir audio
    
    @return     { description_of_the_return_value }
    """
    utils.print_success("Generating singing voice tracks")
    for index, folder in enumerate(paths):
        utils.print_progress_start(
            str(index) + "/" + str(len(paths)) + " " + folder)
        if os.path.isdir(folder) and os.path.exists(folder):
            filelist = os.listdir(folder)
            for filen in filelist:
                nb_error = 0
                if "-bv." in filen:
                    utils.print_error("Backing vocals file found in " + filen)
                if filen.endswith('-ld.wav'):
                    song = filen
                    instru = filen.replace("-ld", "")
                    try:
                        song_samples, song_fs = sf.read(folder + "/" + song)
                    except RuntimeError as run_err:
                        error("RuntimeError", str(run_err))
                        nb_error += 1
                    try:
                        instru_samples, instru_fs = sf.read(folder + "/" +
                                                            instru)
                    except RuntimeError as run_err:
                        error("RuntimeError", str(run_err))
                        nb_error += 1
                    if nb_error == 0:
                        if song_fs != instru_fs:
                            error("SamplingFreq", filen)
                        elif len(instru_samples) != len(song_samples):
                            error("SampleSize", filen)
                        else:
                            voice_samples = song_samples - instru_samples
                            # print(instru)
                            # print(song)
                            # utils.print_error(len(song_samples))
                            # print(song_samples.shape)
                            # print(len(instru_samples))
                            # print(instru_samples.shape)
                            # print(len(voice_samples))
                            # print(voice_samples.shape)
                            sf.write(
                                folder + "/" + filen.replace("nbv-ld", "sv"),
                                voice_samples, song_fs)
                            with open("available.txt", "a") as filep:
                                filep.write(folder + "/" + filen + "\n")
    utils.print_progress_end()
    return "available.txt"
Exemplo n.º 8
0
def match_feat_with_song_gt(dir_feat, dir_gts):
    """Description of match_feat_gt

    Use groundtruth created by 
    http://www.mathieuramona.com/wp/data/jamendo/ 

    associate to local features
    csv 7041 lines yaafe
    lab 326.973 sec ramona
    Definition of YAAFE from 
    http://yaafe.sourceforge.net/features.html
    """
    utils.print_success("Matching local feat to song/instru groundtruths")
    dir_feat = utils.abs_path_dir(dir_feat)
    dir_gts = utils.abs_path_dir(dir_gts)
    block_size = 1024.
    step_size = 512.
    fech = 22050.
    frame_size_ms = block_size / fech
    filenames = [fn for fn in os.listdir(dir_gts)]
    for index, filename in enumerate(filenames):
        utils.print_progress_start(str(index) + "/" + str(len(filenames)) + " " + filename)
        # gather groundtruths
        groundtruths = []
        with open(dir_gts + filename, "r") as filep:
            for row in filep:
                line = row.split(" ")
                end = float(line[1])
                if "no" in line[2]:
                    tag = ",i\n"
                else:
                    tag = ",s\n"
                groundtruths.append([end, tag])
        gt_len = len(groundtruths)
        overflow = False
        gt_index = 0
        cpt = 0
        # Write features & groundtruths to file
        str_to_write = ""
        feat_fn = filename.split(".")[0]
        feat_fn += ".wav.mfcc.csv"
        with open(dir_feat + feat_fn, "r") as filep:
            for index, line in enumerate(filep):
                # todo cleanup
                if gt_index < gt_len:
                    if frame_size_ms * index > groundtruths[gt_index][0]:
                        gt_index += 1
                    if gt_index < gt_len:
                        str_to_write += line[:-1] + groundtruths[gt_index][1]
        with open(dir_feat + feat_fn, "w") as filep:
            filep.write(str_to_write)
    utils.print_progress_end()
Exemplo n.º 9
0
def merge_arff(indir, outfilename):
    """Description of merge_arff

    bextract program from Marsyas generate one output file per audio file
    This function merge them all in one unique file
    Check if analysed file are valid i.e. not empty
    """
    utils.print_success("Preprocessing ARFFs")
    indir = utils.abs_path_dir(indir)
    filenames = os.listdir(indir)
    outfn = open(outfilename, 'w')
    cpt_invalid_fn = 0
    # Write first lines of ARFF template file
    for filename in filenames:
        if os.path.isfile(indir + filename):
            new_fn = validate_arff(indir + filename)
            if new_fn:
                with open(new_fn, 'r') as template:
                    nb_line = 74
                    for line in template:
                        if not nb_line:
                            break
                        nb_line -= 1
                        outfn.write(line)
                    break
            else:
                cpt_invalid_fn += 1
    # Append all arff file to the output file
    cur_file_num = 1
    for filename in filenames:
        if os.path.isfile(indir + filename):
            new_fn = validate_arff(indir + filename)
            if new_fn:
                cur_file_num = cur_file_num + 1
                utils.print_progress_start("Analysing file\t" +
                                           str(cur_file_num))
                fname = open(new_fn, 'r')
                outfn.write("".join(fname.readlines()[74:77]))
                fname.close()
            else:
                cpt_invalid_fn += 1
    utils.print_progress_end()
    outfn.close()
    # os.system("rm " + indir + "*.arff")
    if cpt_invalid_fn:
        utils.print_warning(
            str(cpt_invalid_fn) + " ARFF files with errors found")
    return outfilename
Exemplo n.º 10
0
def extract_features(dir_audio, dir_feat):
    dir_audio = utils.abs_path_dir(dir_audio)
    dir_feat = utils.abs_path_dir(dir_feat)
    filelist = []
    for elem in os.listdir(dir_audio):
        if os.path.isfile(dir_audio + elem):
            filelist.append(dir_audio + elem)
        else:
            for filename in os.listdir(dir_audio + elem):
                if "ld.wav" in filename:
                    filelist.append(dir_audio + elem + "/" + filename)
    # marsyas(dir_feat, filelist)
    for index, filen in enumerate(filelist):
        utils.print_progress_start(str(index+1) + "/" + str(len(filelist)) + " " + filen.split(os.sep)[-1])
        utils.yaafe(filen)
        essentia(dir_feat, filen)
    utils.print_progress_end()
Exemplo n.º 11
0
def read_files(dir_features):
    utils.print_success("Preprocessing YAAFE's features  (approx. 20 minutes)")
    tmp_gts = utils.read_groundtruths("groundtruths/database2.csv")
    dir_features = utils.abs_path_dir(dir_features)
    filenames = os.listdir(dir_features)
    dir_tmp = utils.create_dir(utils.create_dir("tmp") + "ghosal")
    features = []
    groundtruths = []
    to_print = "/" + str(len(filenames))
    for index, filename in enumerate(filenames):
        utils.print_progress_start(str(index + 1) + to_print)
        # pandas used here because fastest method to read csv fils
        data = pandas.read_csv(dir_features + filename, sep=" ").values
        filen = filename.split("_")[0]
        if filen in tmp_gts:
            groundtruths.append(tmp_gts[filen])
            features.append([sum(x) / len(data) for x in zip(*data)])
    return filenames, features, groundtruths
import os
import sys
sys.path.insert(0, './src/')
import utils

dir_tracks = "tracks/"
utils.print_success("YAAFE features extraction (approx. 8 minutes)")

# Assert Python version
if sys.version_info.major != 2:
    utils.print_error("Yaafe needs Python 2 environment")

# Assert folder exists
dir_tracks = utils.abs_path_dir(dir_tracks)

filelist = os.listdir(dir_tracks)
dir_feat = utils.create_dir(utils.create_dir("features") + "database1")
# dir_tmp = utils.create_dir("tmp")
# dir_yaafe = utils.create_dir(dir_tmp + "yaafe")
# fn_filelist = dir_yaafe + "filelist.txt"
dir_current = os.getcwd()
os.chdir(dir_tracks)
yaafe_cmd = 'yaafe -r 22050 -f "mfcc: MFCC blockSize=2048 stepSize=1024" '
yaafe_cmd += "--resample -b " + dir_feat + " "
for index, filen in enumerate(filelist):
    utils.print_progress_start(
        str(index + 1) + "/" + str(len(filelist)) + " " + filen)
    os.system(yaafe_cmd + filen + "> /dev/null 2>&1")
utils.print_progress_end()
os.chdir(dir_current)
Exemplo n.º 13
0
def create_folds(filelist, nb_folds, folds_dir, invert_train_test=False):
    """Description of create_folds

    """
    utils.print_success("Creating folds")
    if nb_folds < 1:
        utils.print_error("Wrong number of folds provided")

    # folds_dir = "/".join(filelist.split("/")[:-1])
    if nb_folds == 1:
        # Train and test set are the same
        folds_dir = folds_dir + "01_fold/"
        utils.create_dir(folds_dir)
        os.system("cp " + filelist + " " + folds_dir + "/train_test.arff")
    else:
        # Create train and test set
        folds_dir = folds_dir + str(nb_folds).zfill(2) + "_folds/"
        utils.create_dir(folds_dir)
        # TODO
        # Read filelist
        # Extract name and tag
        # Separate different tag
        # create folds
        data, meta = arff.loadarff(filelist)
        tags = {}
        for row in data:
            tag = row[-1].decode("ascii")
            if tag in tags:
                tags[tag] += 1
            else:
                tags[tag] = 1
        tags_folds = {}
        tags_folds_index = {}
        for tag in tags:
            tags_folds[tag] = split_number(tags[tag], nb_folds)
            tags_folds_index[tag] = 0
        # Create empty folds
        folds = {}
        # Init empty folds
        for index in range(0, nb_folds):
            folds[index] = ""
        # Fill folds with data
        with open(filelist, "r") as filelist_pointer:
            arff_header = ""
            tmp = ""
            for i, line in enumerate(filelist_pointer):
                utils.print_progress_start("\t" + str(i))
                # Until the 75th line
                if i > 74:
                    # Process ARFF data
                    if "% " in line:
                        # Memorize line
                        tmp += line
                    else:
                        # Get line 3 and add it to corresponding fold
                        tag = line.split(",")[-1][:-1]
                        num_fold = tags_folds_index[tag]
                        if tags_folds[tag][num_fold] == 0:
                            tags_folds_index[tag] += 1
                        tags_folds[tag][tags_folds_index[tag]] -= 1
                        folds[tags_folds_index[tag]] += tmp + line
                        tmp = ""
                else:
                    # Save ARFF header lines
                    arff_header += line
            utils.print_progress_end
        # At this point data has been split up in different part
        # Use this part to create train/test split
        if invert_train_test:
            # Test is bigger than train
            fn_with_min_data = "/train_"
            fn_with_max_data = "/test_"
        else:
            # Train is bigger than test
            fn_with_min_data = "/test_"
            fn_with_max_data = "/train_"
        for index_test in range(0, nb_folds):
            filep = open(
                folds_dir + fn_with_min_data + str(index_test + 1).zfill(2) +
                ".arff", "a")
            filep.write(arff_header + folds[index_test])
            filep.close()
            filep = open(
                folds_dir + fn_with_max_data + str(index_test + 1).zfill(2) +
                ".arff", "a")
            filep.write(arff_header)
            for index_train in range(0, nb_folds):
                if index_train != index_test:
                    filep.write(folds[index_train])
            filep.close()
    return folds_dir
Exemplo n.º 14
0
def create_track_feat_testset(folder, infile, outfile, model_file, train=False):
    """Description of create_track_feat_testset
    Need to read each test file
    compute deltas on mfcc in the ram
    predict and predict_proba 
    generate song and instru ngrams and histograms
    Add the mean of mfcc+deltas
    append 109 features vector in feat_track/feat_test.csv
    """

    utils.print_success("Create track feat testset")
    folder = utils.abs_path_dir(folder)
    infile = utils.abs_path_file(infile)
    clf = joblib.load(model_file)
    track_gts = read_gts(infile, separator=",")
    for index, filename in enumerate(track_gts):
        utils.print_progress_start(str(index+1) + "/" + str(len(track_gts)) + " " + filename)
        mfccs = []
        mfccs_1 = []
        extension = ""
        if train:
            extension = ""
        else:
            extension += "_audio_full_mono_22k"
        extension += ".wav.mfcc.csv"
        with open(folder + filename + extension, "r") as filep:
            if train:
                next(filep)
                next(filep)
                next(filep)
                next(filep)
                next(filep)
            for line in filep:
                if train:
                    line = line.split(",")
                else:
                    line = line.split(" ")
                mfccs_1.append(str2arr(line[:-1]))
                # if train:
                #     mfccs.append(str2arr(line[:-1]))
                # else:
                #     mfccs.append(str2arr(line[0:]))
        mfccs = np.array(mfccs_1)
        delta_mfcc = librosa.feature.delta(mfccs)
        delta2_mfcc = librosa.feature.delta(mfccs, order=2)
        tmp = np.append(mfccs, delta_mfcc, axis=1)
        features = np.append(tmp, delta2_mfcc, axis=1)
        preds_proba = clf.predict_proba(features)

        # Histogramm
        nb_hist_class = 10
        numbers = column(preds_proba, 0)
        hist_pred = np.histogram(numbers, nb_hist_class)
        hist_pred_norm = hist_pred[0] / float(sum(hist_pred[0]))

        ngram_threshold = 0.5
        song_ngram_proba = ngram_proba(local_pred=numbers, threshold=ngram_threshold, above_threshold=True)
        instru_ngram_proba = ngram_proba(local_pred=numbers, threshold=ngram_threshold, above_threshold=False)
        
        preds = clf.predict(features)
        song_ngram = ngram(preds, "s")
        instru_ngram = ngram(preds, "i")

        with open(outfile, "a") as filep:
            filep.write(filename[:12] + "," +
                arr2str(np.mean(mfccs, axis=0)) + "," + 
                arr2str(np.mean(delta_mfcc, axis=0)) + "," + 
                arr2str(np.mean(delta2_mfcc, axis=0)) + "," + 
                arr2str(hist_pred_norm) + "," +
                song_ngram_proba + "," + 
                instru_ngram_proba + "," +
                song_ngram + "," + 
                instru_ngram + "," +
                track_gts[filename] + "\n")
    utils.print_progress_end()
Exemplo n.º 15
0
def process_local_feat(indir, file_gts_track, outdir_local, out_feat_global, train):
    """Description of process_local_feat
    Add delta and double delta to MFCCs
    """
    
    utils.print_success("Processing local features")
    
    # Preprocess arg
    indir = utils.abs_path_dir(indir)
    file_gts_track = utils.abs_path_file(file_gts_track)
    filelist = os.listdir(indir)
    outdir_local = utils.abs_path_dir(outdir_local)

    track_gts = {}
    with open(file_gts_track, "r") as filep:
        for line in filep:
            line = line.split(",")
            if train:
                index = line[0]
            else:
                index = line[0] + ".wav.mfcc.csv"
            track_gts[index] = line[1][:-1]

    for index, filename in enumerate(filelist):
        utils.print_progress_start(str(index) + "/" + str(len(filelist)) + " " + filename)
        if filename in track_gts:
            mfccs = []
            groundtruths = []
            with open(indir + filename, "r") as filep:
                next(filep)
                next(filep)
                next(filep)
                next(filep)
                next(filep)
                for line in filep:
                    line = line.split(",")
                    mfccs.append(str2arr(line[:-1]))
                    if train:
                        groundtruths.append(line[-1][:-1])
            mfccs = np.array(mfccs)
            delta_mfcc = librosa.feature.delta(mfccs)
            delta2_mfcc = librosa.feature.delta(mfccs, order=2)
            # Write local features in outdir_local
            with open(outdir_local + filename, "w") as filep:
                gt_to_write = ""
                if "i" in track_gts[filename]:
                    gt_to_write = ",i"
                elif "s" in track_gts[filename]:
                    # postpone frame groundtruth annotationa to another function later in the code
                    gt_to_write = ""
                else:
                    utils.print_warning("bayle.py line 231 local frame groundtruth undefined")
                if train:
                    for a, b, c, d in zip(mfccs, delta_mfcc, delta2_mfcc, groundtruths):
                        filep.write(arr2str(a) + "," + arr2str(b) + "," + arr2str(c) + "," + d + "\n")
                else:
                    for a, b, c in zip(mfccs, delta_mfcc, delta2_mfcc):
                        filep.write(arr2str(a) + "," + arr2str(b) + "," + arr2str(c) + gt_to_write + "\n")
            # # Write global features in out_feat_global
            # with open(out_feat_global, "a") as filep:
            #     filep.write(filename + "," +
            #         arr2str(np.mean(mfccs, axis=0)) + "," + 
            #         arr2str(np.mean(delta_mfcc, axis=0)) + "," + 
            #         arr2str(np.mean(delta2_mfcc, axis=0)) + "," + 
            #         track_gts[filename] + "\n")
    utils.print_progress_end()
    utils.print_success("Adding local groundtruths to Songs in Jamendo thanks to Ramona annotations")
    match_feat_with_song_gt(dir_feat=outdir_local, dir_gts="groundtruths/frame_annot_jamendo_ramona/")
    utils.print_success("Done")