예제 #1
0
def precision_100percent(train, test):
    """Description of precision_100percent

    ..todo::
        1 Find best clf with default param
        2 vary param of best clf and find best param
        3 use best param and best clf to find recall for 100 percent precision
    """
    utils.print_success("Find Recall for best Precision for each tag")
    train = utils.abs_path_file(train)
    test = utils.abs_path_file(test)
    train_features, train_groundtruths = read_file(train)
    test_features, test_groundtruths = read_file(test)
    classifiers = {
        # "RandomForest": RandomForestClassifier(),#n_estimators=5
        "DecisionTree":DecisionTreeClassifier()#,#max_depth=10
        # "SVM":SVC(kernel="linear", C=0.0205),
        # "ExtraTreesClassifier":ExtraTreesClassifier(n_estimators=5, criterion="entropy", max_features="log2", max_depth=9),
        # "LogisticRegression":LogisticRegression()
    }
    tags = list(set(test_groundtruths))
    nb_tag = len(tags)
    step = 0.01
    # for index, tag in enumerate(["i"]):
    for index, tag in enumerate(tags):
        utils.print_success("Tag " + tag)
        max_precision = 0
        max_recall = 0
        max_f_measure = 0
        max_clf = ""
        max_weight = 0
        for key in classifiers:
            clf = classifiers[key]
            # for weight in np.arange(0., 0.01, 0.000001):
            # for weight in np.arange(step, 1-step, step):
            for weight in np.arange(0.0, 1.0, step):
                print("Classifier " + key + " & Weight " + str(weight))
                sys.stdout.write("\033[F")
                sys.stdout.write("\033[K")
                clf.set_params(class_weight={"i":weight, "s":1-weight})
                clf.fit(train_features, train_groundtruths)
                predictions = clf.predict(test_features)
                precision = precision_score(test_groundtruths, predictions, average=None)[index]
                if precision >= max_precision:
                    recall = recall_score(test_groundtruths, predictions, average=None)[index]
                    # if recall > max_recall:
                    max_precision = precision
                    max_recall = recall
                    max_f_measure = f1_score(test_groundtruths, predictions, average=None)[index]
                    max_weight = weight
                    max_clf = key
        sys.stdout.write("\033[K")
        utils.print_info("\tClassifier " + str(max_clf))
        utils.print_info("\tPrecision  " + str(max_precision))
        utils.print_info("\tRecall     " + str(max_recall))
        utils.print_info("\tF-Measure  " + str(max_f_measure))
        utils.print_info("\tWeight     " + str(max_weight))
예제 #2
0
def get_gts():
    gt_filen = "../data/filelist.csv"
    utils.abs_path_file(gt_filen)
    gts = {}
    with open(gt_filen, "r") as filep:
        next(filep)
        for line in filep:
            if ",male," in line or ",female," in line:
                row = line.split(",")
                gts[row[0]] = row[3][0]
    return gts
예제 #3
0
def read_file(filename):
    filename = utils.abs_path_file(filename)
    with open(filename, "r") as filep:
        next(filep)
        first_line = True
        groundtruths = []
        filenames = []
        for row in filep:
            splitted_row = row.split(",")
            filenames.append(splitted_row[0])
            if first_line:
                first_line = False
                if "s" in splitted_row[-1][:-1]:
                    groundtruths = np.array([True])
                elif "i" in splitted_row[-1][:-1]:
                    groundtruths = np.array([False])
                features = np.array([splitted_row[1:-1]]).astype(np.float)
            else:
                cur_feat = np.array([splitted_row[1:-1]]).astype(np.float)
                if "s" in splitted_row[-1][:-1]:
                    groundtruths = np.append(groundtruths, np.array([True]))
                elif "i" in splitted_row[-1][:-1]:
                    groundtruths = np.append(groundtruths, np.array([False]))
                features = np.append(features, cur_feat, axis=0)
    return filenames, features, groundtruths
예제 #4
0
def new_algo_final(indir, file_gts_track):
    utils.print_success("Approx. time ~6 hours.")
    # Preprocess arg
    indir = utils.abs_path_dir(indir)
    file_gts_track = utils.abs_path_file(file_gts_track)
    dir_tmp = utils.create_dir(utils.create_dir("src/tmp") + "bayle")
    feat_frame_train = utils.create_dir(dir_tmp + "feat_frame_train")
    feat_frame_test = utils.create_dir(dir_tmp + "feat_frame_test")
    outdir_global = utils.create_dir(dir_tmp + "feat_track")
    feat_train = outdir_global + "train.csv"
    feat_test = outdir_global + "test.csv"
    models_dir = utils.create_dir(dir_tmp + "models")
    loc_feat_testset_dirpath = "features/database2/"
    filelist_train = "groundtruths/database1.csv"
    filelist_test = "groundtruths/database2.csv"
    models_global = utils.create_dir(dir_tmp + "models_track")

    process_local_feat(indir, file_gts_track, outdir_local=feat_frame_train, out_feat_global=feat_train, train=False)
    classify.create_models(outdir=models_dir, train_dir=feat_frame_train, separator=",", classifiers="RandomForest")

    """
    Create features at track scale for the train set
    Features: MFCC + Delta + Double Delta + ngrams + hist
    """
    model_file = "src/tmp/bayle/models/RandomForest/RandomForest.pkl"
    model_file = "/media/sf_DATA/ReproducibleResearchIEEE2017/src/tmp/bayle/models/RandomForest/RandomForest.pkl"
    create_track_feat_testset(indir, filelist_train, feat_train, model_file, train=True)

    # # 15h28m44s to 19h08m28s Done in 13184117ms
    create_track_feat_testset(loc_feat_testset_dirpath, filelist_test, feat_test, model_file)  

    classify.create_models(outdir=models_global, train_file=feat_train, classifiers="RandomForest")
    process_results(feat_train, feat_test)
예제 #5
0
def read_item_tag(filename):
    filename = utils.abs_path_file(filename)
    data = {}
    with open(filename, "r") as filep:
        for line in filep:
            line = line.split("\t")
            data[line[0][2:]] = line[1]
    return data
예제 #6
0
def classify(train=None,
             test=None,
             data=None,
             res_dir="res/",
             disp=True,
             outfilename=None):
    """Description of compare
    compare multiple classifier and display the best one
    """
    utils.print_success("Comparison of differents classifiers")
    if data is not None:
        train_features = data["train_features"]
        train_groundtruths = data["train_groundtruths"]
        test_features = data["test_features"]
        test_groundtruths = data["test_groundtruths"]
    else:
        train = utils.abs_path_file(train)
        test = utils.abs_path_file(test)
        train_features, train_groundtruths = read_file(train)
        test_features, test_groundtruths = read_file(test)
    if not utils.create_dir(res_dir):
        res_dir = utils.abs_path_dir(res_dir)
    classifiers = {
        "RandomForest": RandomForestClassifier(n_jobs=-1)
        # "RandomForest": RandomForestClassifier(n_estimators=5),
        # "KNeighbors":KNeighborsClassifier(3),
        # "GaussianProcess":GaussianProcessClassifier(1.0 * RBF(1.0), warm_start=True),
        # "DecisionTree":DecisionTreeClassifier(max_depth=5),
        # "MLP":MLPClassifier(),
        # "AdaBoost":AdaBoostClassifier(),
        # "GaussianNB":GaussianNB(),
        # "QDA":QuadraticDiscriminantAnalysis(),
        # "SVM":SVC(kernel="linear", C=0.025),
        # "GradientBoosting":GradientBoostingClassifier(),
        # "ExtraTrees":ExtraTreesClassifier(),
        # "LogisticRegression":LogisticRegression(),
        # "LinearDiscriminantAnalysis":LinearDiscriminantAnalysis()
    }
    for key in classifiers:
        utils.print_success(key)
        clf = classifiers[key]
        utils.print_info("\tFit")
        clf.fit(train_features, train_groundtruths)
        utils.print_info("\tPredict")
        predictions = clf.predict(test_features)
    return predictions
예제 #7
0
def process_results(in_fn, out_fn):
    in_fn = utils.abs_path_file(in_fn)
    out_fp = open(out_fn, "w")
    with open(in_fn, "r") as filep:
        for index, line in enumerate(filep):
            if index % 2:
                row = line[:-1].split("\t")
                out_fp.write(row[0].split("_")[0] + "," + row[2] + "\n")
    out_fp.close()
예제 #8
0
def figures1bd(indir, file_gts_track):
    """Description of figures1bd

    infile is formated like:
    /media/sf_github/yann/train/01 - 01 Les Jardins Japonais.wav.mfcc.csv
    feat1 feat2 ... featn tag1
    feat1 feat2 ... featn tag2
    ...
    feat1 feat2 ... featn tag2

    0 Input the local extracted features from YAAFE
        13 MFCC per frame
        186 musical pieces as train set
    1 Computes delta and double delta (39 features per frame)
    2 Gather global mean (39 features per musical pieces)
    3 train on mfcc & deltas (39 feat/frame) to output global predictions
    4 Use global preds to compute song and instru n-grams and histogramm
        which add 70 feat/track
        lead to a total of 109 feat/track
    5 Fit on 109x186
    6 predict (or predict_proba) on 41491 track 
    """

    # Preprocess arg
    indir = utils.abs_path_dir(indir)
    file_gts_track = utils.abs_path_file(file_gts_track)
    feat_frame_train = "feat_frame_train/"
    utils.create_dir(feat_frame_train)
    feat_frame_test = "feat_frame_test/"
    utils.create_dir(feat_frame_test)
    outdir_global = "feat_track/"
    utils.create_dir(outdir_global)
    feat_train = outdir_global + "train.csv"
    feat_test = outdir_global + "test.csv"
    models_dir = "models/"
    utils.create_dir(models_dir)
    loc_feat_testset_dirpath = "/media/sf_DATA/Datasets/Simbals/yaafe/results/processed/"
    filelist_test = "filelist_test.tsv"
    filelist_train = "filelist_train.tsv"
    models_global = "models_track/"
    utils.create_dir(models_global)

    # process_local_feat(indir, file_gts_track, feat_frame_train, feat_train, train=True)    
    # classify.create_models(outdir=models_dir, train_dir=feat_frame_train, separator=",")
    # create_track_feat_testset(indir, filelist_train, feat_train, train=True)

    # 15h28m44s to 19h08m28s Done in 13184117ms
    # create_track_feat_testset(loc_feat_testset_dirpath, filelist_test, feat_test)  

    # classify.create_models(outdir=models_global, train_file=feat_train)
    # classify.test_models_parallel(
        # models_dir=models_global,
        # out_dir="results/",
        # test_file=feat_test)
    
    # Display results
    reproduce.plot_results("results/")
예제 #9
0
def extract_features(files="available.txt"):
    utils.print_success("Extracting features")
    files = utils.abs_path_file(files)
    dir_feat = "/media/sf_DATA/ISMIR2017/features/gender/"
    with open(files, "r") as filep:
        for line in filep:
            line = line[:-1]
            utils.yaafe(line, dir_feat + "song/", verbose=True)
            utils.yaafe(line.replace("nbv-ld", "sv"),
                        dir_feat + "sv/",
                        verbose=True)
def read_train_file(filename):
    """
    Read ONE train file
    """
    groundtruths = []
    features = []
    filename = utils.abs_path_file(filename)
    with open(filename, "r") as filep:
        for line in filep:
            line = line.split(",")
            groundtruths.append(line[-1][:-1])
            features.append(line[1:-1])
    return features, groundtruths
예제 #11
0
def merge_gt_feat(gt_filen, feat_filen, train_filen):
    """
    @brief      Read the files containing ground truths and features and merge
                them to be used for classification
    
    @param      gt_filen    The ground truths filename
    @param      feat_filen  The features filename
    """
    utils.print_success("Adding groundtruth")
    feat_filen = utils.abs_path_file(feat_filen)
    gt_filen = utils.abs_path_file(gt_filen)
    gts = read_gts(gt_filen)
    output = open(train_filen, "w")
    with open(feat_filen, "r") as feat:
        cur_id = ""
        for line in feat:
            if "filename" in line:
                m = re.search(r"\d{2,10}", line)
                cur_id = m.group()
            elif len(cur_id) > 1 and "srate" not in line and cur_id in gts:
                output.write(
                    str(cur_id) + "," + line[:-4] + gts[cur_id] + "\n")
    output.close()
def read_item_tag(filename):
    """Description of read_file

    example line:
    filename,tag
    """

    filename = utils.abs_path_file(filename)
    groundtruths = {}
    with open(filename, "r") as filep:
        for row in filep:
            line = row.split(",")
            groundtruths[line[0]] = line[1][:-1]
    return groundtruths
def read_file(filename):
    """Description of read_file

    train/test example line:
    filename,feat1,feat2,...,featn,tag
    """
    filename = utils.abs_path_file(filename)
    groundtruths = []
    features = []
    with open(filename, "r") as filep:
        for row in filep:
            line = row.split(",")
            groundtruths.append(line[-1][:-1])
            features.append([float(i) for i in line[1:-1]])
    return features, groundtruths
def read_preds(filename):
    """Description of read_file

    ex file:
    ISRC,tag
    """
    filename = utils.abs_path_file(filename)
    isrcs = {}
    with open(filename, "r") as filep:
        for row in filep:
            line = row.split(",")
            # print(line)
            isrcs[line[0]] = float(line[1])
            # isrcs[line[0]] = 1.0-float(line[1])
    return isrcs
예제 #15
0
def read_preds(filename):
    pres_filen = utils.abs_path_file(filename)
    predictions = {}
    i = 0
    with open(filename, "r") as filep:
        for index, line in enumerate(filep):
            if index % 2:
                line = line.split("\t")
                name = line[0].split("/")[-1]
                pred = float(line[-1])
                if pred > 0.5:
                    predictions[name] = "s"
                else:
                    predictions[name] = "i"
    return predictions
예제 #16
0
def read_gts(filename):
    filename = utils.abs_path_file(filename)
    groundtruths = {}
    i = 0
    with open(filename, "r") as filep:
        for index, line in enumerate(filep):
            if index > 73:
                if i == 0:
                    i += 1
                    name = line.split("/")[-1][:-1]
                elif i == 1:
                    i += 1
                elif i == 2:
                    i = 0
                    groundtruths[name] = line.split(",")[-1][:-1]
    return groundtruths
예제 #17
0
def read_gts(gt_filen):
    """
    @brief      Parse ground truths file in a Python dictionary
    
    @param      gt_filen  The ground truths filename
    
    @return     Return a dictionary with key corresponding to songs' id and
                value corresponding to male or female tag
    """
    gt_filen = utils.abs_path_file(gt_filen)
    data = {}
    with open(gt_filen, "r") as filep:
        for line in filep:
            if "male," in line:  # this works for ,male, or ,female,
                row = line.split(",")
                data[row[0]] = row[3]
    return data
예제 #18
0
def read_data_1(stats_dir, filen):
    stats_dir = utils.abs_path_dir(stats_dir)
    filen = utils.abs_path_file(filen)
    data = []
    names = []
    with open(stats_dir + filen, "r") as filep:
        for line in filep:
            # Read file with lines like this:
            # GA,0.578947368421,0.631578947368,0.710526315789,0.722222222222
            # SVMBFF,0.631578947368,0.684210526316,0.815789473684,0.66666666
            # VQMM,0.736842105263,0.842105263158,0.842105263158,0.75,0.61111
            row = line[:-1].split(",")
            tmp = []
            for index in range(1, len(row)):
                names.append(row[0])
                tmp.append(float(row[index]))
            data.append(tmp)
            print(filen.split(".")[0].split("_")[1].title() + " for " + row[0] + " \t= " + str("{0:.3f}".format(sum(tmp)/len(tmp))) + " ± " + str("{0:.3f}".format(stdev(tmp))))
예제 #19
0
def read_file_bayle(filename):
    """Description of read_file

    train/test example line:
    filename,feat1,feat2,...,featn,tag
    """
    filename = utils.abs_path_file(filename)
    filenames = []
    groundtruths = []
    features = []
    with open(filename, "r") as filep:
        for row in filep:
            line = row.split(",")
            filenames.append(line[0])
            features.append([float(i) for i in line[1:-1]])
            gt = line[-1]
            while "\n" in gt or "\r" in gt:
                gt = gt [:-1]
            groundtruths.append(gt)
    return filenames, features, groundtruths
예제 #20
0
def remove_silence(filen, verbose=False):
    utils.print_success("Removing silence")
    filen = utils.abs_path_file(filen)
    with open(filen, "r") as filep:
        for line in filep:
            print(line)
            # Step 1 Gather samples
            song_fn = line[:-1]
            voice_fn = song_fn.replace("nbv-ld", "sv")
            try:
                voice_samples, voice_fs = sf.read(voice_fn)
            except:
                utils.print_error(
                    "ERROR in identify_singing_voice_gender line 207 in sf.read(voice_fn)"
                )
            try:
                song_samples, song_fs = sf.read(song_fn)
            except:
                utils.print_error(
                    "ERROR in identify_singing_voice_gender line 207 in sf.read(song_fn)"
                )
            idxs = np.any(
                voice_samples != 0.,
                axis=1)  # index of rows with at least one non zero value
            voice_samples_non_zero = voice_samples[
                idxs, 0]  # selection of the wanted rows
            voice_samples_non_zero = voice_samples_non_zero.reshape(
                1, len(voice_samples_non_zero))
            song_samples_non_zero = song_samples[
                idxs, 0]  # selection of the wanted rows
            song_samples_non_zero = song_samples_non_zero.reshape(
                1, len(song_samples_non_zero))

            # Step 2 Extract features
            dir_feat = "/media/sf_DATA/ISMIR2017/features/gender/"
            save_feat(
                voice_samples_non_zero, dir_feat + "sv_nonzero/" +
                voice_fn.split(os.sep)[-1] + ".mfcc")
            save_feat(
                song_samples_non_zero, dir_feat + "song_nonzero/" +
                song_fn.split(os.sep)[-1] + ".mfcc")
def read_test_file(filename):
    """
    Read ONE test file with content like:

        feat1 feat2 ... featN
        feat1 feat2 ... featN
        ...
        feat1 feat2 ... featN

    """
    features = []
    filename = utils.abs_path_file(filename)
    with open(filename, "r") as filep:
        for line in filep:
            line = line.split(" ")
            line[-1] = line[-1][:-1]
            feat = []
            for tmp_feat in line:
                feat.append(float(tmp_feat))
            features.append(feat)
    return features
예제 #22
0
def create_filelist(kara1k, dir_audio):
    """
    @brief      Creates a filelist.
    
    @param      kara1k  The kara 1 k
    
    @return     { description_of_the_return_value }
    """
    utils.print_success("Creating file list to be analyzed")
    kara1k = utils.abs_path_file(kara1k)
    dir_audio = utils.abs_path_dir(dir_audio)
    filelist = []
    with open(kara1k, "r") as filep:
        next(filep)
        for line in filep:
            row = line.split(",")
            # "male" in line avoir females and male and get female and male tracks
            if "1" in row[6] and "1" in row[7] and "1" in row[
                    8] and "male" in line:
                filelist.append(dir_audio + row[1] + "_" + row[2] + "_" +
                                row[0])
    return filelist
예제 #23
0
def read_data_2(stats_dir, filen):
    stats_dir = utils.abs_path_dir(stats_dir)
    filen = utils.abs_path_file(stats_dir + filen)
    data = []
    names = []
    tmp = []
    name = ""
    with open(filen, "r") as filep:
        next(filep)
        for line in filep:
            row = line[:-1].split(";")
            if row[0] in names:
                names.append(row[0])
                tmp.append(float(row[1]))
            else:
                if len(tmp) > 0:
                    data.append(tmp)
                    tmp = []
                names.append(row[0])
                tmp.append(float(row[1]))
    data.append(tmp)
    return data, names
예제 #24
0
def add_groundtruth(feature_fn, groundtruth_fn, output_fn):
    """Description of add_groundtruth

    Write in output filename the groundtruth merged with corresponding features

    ..todo:: Error with old_tag not corresponding to filename...


    """
    utils.print_success("Adding groundtruth")
    feature_fn = utils.abs_path_file(feature_fn)
    groundtruth_fn = utils.abs_path_file(groundtruth_fn)
    if os.path.isfile(output_fn) and os.path.exists(output_fn):
        utils.print_warning("Overwritting existing output file: " +
                            utils.abs_path_file(output_fn))
    # TODO Read groundtruth file in memory
    tmp_gt = csv.reader(open(groundtruth_fn, "r"))
    groundtruths = {}
    for row in tmp_gt:
        groundtruths[row[0]] = row[1]
    tags = []
    output = open(output_fn, "w")
    with open(feature_fn, "r") as feat:
        line_num = 0
        tmp_line = ""
        for line in feat:
            line_num += 1
            if line_num > 74:
                if line[0] != "%":
                    # Alter feature line with correct tag
                    cur_line = line.split(",")
                    old_tag = cur_line[-1].split("_")[0]
                    if old_tag in groundtruths:
                        new_tag = groundtruths[old_tag]
                        output.write(tmp_line + ",".join(cur_line[:-1]) + "," +
                                     new_tag + "\n")
                        tmp_line = ""
                        tags.append(new_tag)
                    else:
                        # TODO
                        # File not in groundtruth
                        tmp_line = ""
                        # utils.print_warning("Error with " + old_tag)
                else:
                    tmp_line += line
            elif line_num == 2:
                output.write("@relation train_test.arff\n")
                # output.write("@relation MARSYAS_KEA\n")
            elif line_num == 71:
                # Alter line 71 containing all tag gathered along the way
                # TODO enhance
                output.write("@attribute output {i,s}\n")
            else:
                # Write header
                output.write(line)

    tags = list(set(tags))
    utils.print_warning("TODO Take in account diffents tags than " + str(tags))

    output.close()
    utils.print_success("Groundtruth added")
def cross_validation(train_filename, n_folds, outfilename):

    filename = utils.abs_path_file(train_filename)
    features = []
    groundtruths = []
    with open(filename, "r") as filep:
        for line in filep:
            line = line.split(",")
            features.append([float(x) for x in line[1:-1]])
            groundtruths.append(line[-1][:-1])
    features = np.array(features)
    groundtruths = np.array(groundtruths)

    # Init
    # if os.path.exists(outfilename):
    try:
        with open(outfilename, "r") as filep:
            data = json.load(filep)
    except:
        data = {}
    # else:
    #     data = {}
    algo_name = "Method 1"
    data[algo_name] = {}
    data[algo_name]["uneven"] = {}
    data[algo_name]["balanced"] = {}
    for distribution in data[algo_name]:
        data[algo_name][distribution]["precision"] = {}
        data[algo_name][distribution]["recall"] = {}
        data[algo_name][distribution]["f1"] = {}
        for tmp in data[algo_name][distribution]:
            data[algo_name][distribution][tmp]["instru"] = []
            data[algo_name][distribution][tmp]["song"] = []

    skf = StratifiedKFold(n_splits=n_folds)
    for i in range(0, 10):
        utils.print_warning("TODO for i in range")
        song_precis = []
        song_recall = []
        song_fmeasu = []
        inst_precis = []
        inst_recall = []
        inst_fmeasu = []
        cur_fold = 0
        for train, test in skf.split(features, groundtruths):
            cur_fold += 1
            utils.print_success("Iteration " + str(i) + "\tFold " +
                                str(cur_fold))
            dataset = {}
            dataset["train_features"] = features[train]
            dataset["train_groundtruths"] = groundtruths[train]
            dataset["test_features"] = features[test]
            dataset["test_groundtruths"] = groundtruths[test]
            predictions = classify(data=dataset)

            song_precis.append(
                precision_score(dataset["test_groundtruths"],
                                predictions,
                                average=None)[1])
            song_recall.append(
                recall_score(dataset["test_groundtruths"],
                             predictions,
                             average=None)[1])
            song_fmeasu.append(
                f1_score(dataset["test_groundtruths"],
                         predictions,
                         average=None)[1])
            inst_precis.append(
                precision_score(dataset["test_groundtruths"],
                                predictions,
                                average=None)[0])
            inst_recall.append(
                recall_score(dataset["test_groundtruths"],
                             predictions,
                             average=None)[0])
            inst_fmeasu.append(
                f1_score(dataset["test_groundtruths"],
                         predictions,
                         average=None)[0])

        song_precis = sum(song_precis) / float(len(song_precis))
        song_recall = sum(song_recall) / float(len(song_recall))
        song_fmeasu = sum(song_fmeasu) / float(len(song_fmeasu))
        inst_precis = sum(inst_precis) / float(len(inst_precis))
        inst_recall = sum(inst_recall) / float(len(inst_recall))
        inst_fmeasu = sum(inst_fmeasu) / float(len(inst_fmeasu))

        # Song
        data[algo_name]["balanced"]["precision"]["song"].append(song_precis)
        data[algo_name]["balanced"]["recall"]["song"].append(song_recall)
        data[algo_name]["balanced"]["f1"]["song"].append(song_fmeasu)
        # Instru
        data[algo_name]["balanced"]["precision"]["instru"].append(inst_precis)
        data[algo_name]["balanced"]["recall"]["instru"].append(inst_recall)
        data[algo_name]["balanced"]["f1"]["instru"].append(inst_fmeasu)

    with open(outfilename, "w") as outfile:
        json.dump(data, outfile, indent=2)
예제 #26
0
def process_local_feat(indir, file_gts_track, outdir_local, out_feat_global, train):
    """Description of process_local_feat
    Add delta and double delta to MFCCs
    """
    
    utils.print_success("Processing local features")
    
    # Preprocess arg
    indir = utils.abs_path_dir(indir)
    file_gts_track = utils.abs_path_file(file_gts_track)
    filelist = os.listdir(indir)
    outdir_local = utils.abs_path_dir(outdir_local)

    track_gts = {}
    with open(file_gts_track, "r") as filep:
        for line in filep:
            line = line.split(",")
            if train:
                index = line[0]
            else:
                index = line[0] + ".wav.mfcc.csv"
            track_gts[index] = line[1][:-1]

    for index, filename in enumerate(filelist):
        utils.print_progress_start(str(index) + "/" + str(len(filelist)) + " " + filename)
        if filename in track_gts:
            mfccs = []
            groundtruths = []
            with open(indir + filename, "r") as filep:
                next(filep)
                next(filep)
                next(filep)
                next(filep)
                next(filep)
                for line in filep:
                    line = line.split(",")
                    mfccs.append(str2arr(line[:-1]))
                    if train:
                        groundtruths.append(line[-1][:-1])
            mfccs = np.array(mfccs)
            delta_mfcc = librosa.feature.delta(mfccs)
            delta2_mfcc = librosa.feature.delta(mfccs, order=2)
            # Write local features in outdir_local
            with open(outdir_local + filename, "w") as filep:
                gt_to_write = ""
                if "i" in track_gts[filename]:
                    gt_to_write = ",i"
                elif "s" in track_gts[filename]:
                    # postpone frame groundtruth annotationa to another function later in the code
                    gt_to_write = ""
                else:
                    utils.print_warning("bayle.py line 231 local frame groundtruth undefined")
                if train:
                    for a, b, c, d in zip(mfccs, delta_mfcc, delta2_mfcc, groundtruths):
                        filep.write(arr2str(a) + "," + arr2str(b) + "," + arr2str(c) + "," + d + "\n")
                else:
                    for a, b, c in zip(mfccs, delta_mfcc, delta2_mfcc):
                        filep.write(arr2str(a) + "," + arr2str(b) + "," + arr2str(c) + gt_to_write + "\n")
            # # Write global features in out_feat_global
            # with open(out_feat_global, "a") as filep:
            #     filep.write(filename + "," +
            #         arr2str(np.mean(mfccs, axis=0)) + "," + 
            #         arr2str(np.mean(delta_mfcc, axis=0)) + "," + 
            #         arr2str(np.mean(delta2_mfcc, axis=0)) + "," + 
            #         track_gts[filename] + "\n")
    utils.print_progress_end()
    utils.print_success("Adding local groundtruths to Songs in Jamendo thanks to Ramona annotations")
    match_feat_with_song_gt(dir_feat=outdir_local, dir_gts="groundtruths/frame_annot_jamendo_ramona/")
    utils.print_success("Done")
def classify(train=None,
             test=None,
             data=None,
             res_dir="res/",
             disp=True,
             outfilename=None):
    """Description of compare
    compare multiple classifier and display the best one
    """
    utils.print_success("Comparison of differents classifiers")
    if data is not None:
        train_features = data["train_features"]
        train_groundtruths = data["train_groundtruths"]
        test_features = data["test_features"]
        test_groundtruths = data["test_groundtruths"]
    else:
        train = utils.abs_path_file(train)
        test = utils.abs_path_file(test)
        train_features, train_groundtruths = read_file(train)
        test_features, test_groundtruths = read_file(test)
    if not utils.create_dir(res_dir):
        res_dir = utils.abs_path_dir(res_dir)
    classifiers = {
        "RandomForest": RandomForestClassifier()
        # "RandomForest": RandomForestClassifier(n_estimators=5),
        # "KNeighbors":KNeighborsClassifier(3),
        # "GaussianProcess":GaussianProcessClassifier(1.0 * RBF(1.0), warm_start=True),
        # "DecisionTree":DecisionTreeClassifier(max_depth=5),
        # "MLP":MLPClassifier(),
        # "AdaBoost":AdaBoostClassifier(),
        # "GaussianNB":GaussianNB(),
        # "QDA":QuadraticDiscriminantAnalysis(),
        # "SVM":SVC(kernel="linear", C=0.025),
        # "GradientBoosting":GradientBoostingClassifier(),
        # "ExtraTrees":ExtraTreesClassifier(),
        # "LogisticRegression":LogisticRegression(),
        # "LinearDiscriminantAnalysis":LinearDiscriminantAnalysis()
    }
    for key in classifiers:
        utils.print_success(key)
        clf = classifiers[key]
        utils.print_info("\tFit")
        clf.fit(train_features, train_groundtruths)
        utils.print_info("\tPredict")
        predictions = clf.predict(test_features)

        if outfilename is not None:
            with open(outfilename, "w") as filep:
                for gt, pred in zip(test_groundtruths, predictions):
                    filep.write(gt + "," + pred + "\n")

        # Global
        data = [key]
        data.append(
            str(
                precision_score(test_groundtruths,
                                predictions,
                                average='weighted')))
        data.append(
            str(
                recall_score(test_groundtruths,
                             predictions,
                             average='weighted')))
        data.append(
            str(f1_score(test_groundtruths, predictions, average='weighted')))
        data = ",".join(data)
        if disp:
            print(data)
        else:
            with open(res_dir + "global.csv", "a") as filep:
                filep.write(data + ",\n")
        # Local
        for index, tag in enumerate(list(set(train_groundtruths))):
            precision = precision_score(test_groundtruths,
                                        predictions,
                                        average=None)
            recall = recall_score(test_groundtruths, predictions, average=None)
            f1 = f1_score(test_groundtruths, predictions, average=None)
            line = key + "," + str(precision[index]) + "," + str(
                recall[index]) + "," + str(f1[index])
            if disp:
                print(line)
            else:
                with open(res_dir + "tag_" + tag + ".csv", "a") as filep:
                    filep.write(line + ",\n")
    return predictions
예제 #28
0
def create_track_feat_testset(folder, infile, outfile, model_file, train=False):
    """Description of create_track_feat_testset
    Need to read each test file
    compute deltas on mfcc in the ram
    predict and predict_proba 
    generate song and instru ngrams and histograms
    Add the mean of mfcc+deltas
    append 109 features vector in feat_track/feat_test.csv
    """

    utils.print_success("Create track feat testset")
    folder = utils.abs_path_dir(folder)
    infile = utils.abs_path_file(infile)
    clf = joblib.load(model_file)
    track_gts = read_gts(infile, separator=",")
    for index, filename in enumerate(track_gts):
        utils.print_progress_start(str(index+1) + "/" + str(len(track_gts)) + " " + filename)
        mfccs = []
        mfccs_1 = []
        extension = ""
        if train:
            extension = ""
        else:
            extension += "_audio_full_mono_22k"
        extension += ".wav.mfcc.csv"
        with open(folder + filename + extension, "r") as filep:
            if train:
                next(filep)
                next(filep)
                next(filep)
                next(filep)
                next(filep)
            for line in filep:
                if train:
                    line = line.split(",")
                else:
                    line = line.split(" ")
                mfccs_1.append(str2arr(line[:-1]))
                # if train:
                #     mfccs.append(str2arr(line[:-1]))
                # else:
                #     mfccs.append(str2arr(line[0:]))
        mfccs = np.array(mfccs_1)
        delta_mfcc = librosa.feature.delta(mfccs)
        delta2_mfcc = librosa.feature.delta(mfccs, order=2)
        tmp = np.append(mfccs, delta_mfcc, axis=1)
        features = np.append(tmp, delta2_mfcc, axis=1)
        preds_proba = clf.predict_proba(features)

        # Histogramm
        nb_hist_class = 10
        numbers = column(preds_proba, 0)
        hist_pred = np.histogram(numbers, nb_hist_class)
        hist_pred_norm = hist_pred[0] / float(sum(hist_pred[0]))

        ngram_threshold = 0.5
        song_ngram_proba = ngram_proba(local_pred=numbers, threshold=ngram_threshold, above_threshold=True)
        instru_ngram_proba = ngram_proba(local_pred=numbers, threshold=ngram_threshold, above_threshold=False)
        
        preds = clf.predict(features)
        song_ngram = ngram(preds, "s")
        instru_ngram = ngram(preds, "i")

        with open(outfile, "a") as filep:
            filep.write(filename[:12] + "," +
                arr2str(np.mean(mfccs, axis=0)) + "," + 
                arr2str(np.mean(delta_mfcc, axis=0)) + "," + 
                arr2str(np.mean(delta2_mfcc, axis=0)) + "," + 
                arr2str(hist_pred_norm) + "," +
                song_ngram_proba + "," + 
                instru_ngram_proba + "," +
                song_ngram + "," + 
                instru_ngram + "," +
                track_gts[filename] + "\n")
    utils.print_progress_end()
예제 #29
0
def cross_validation(train_filename, n_folds, outfilename):
    utils.print_success("Cross validation")
    filename = utils.abs_path_file(train_filename)

    condition = train_filename.split(".")[0].split(os.sep)[-1]

    features = []
    groundtruths = []
    with open(filename, "r") as filep:
        for line in filep:
            line = line[:-1].split(",")
            features.append([float(x) for x in line[0:-1]])
            groundtruths.append(line[-1])
    features = np.array(features)
    groundtruths = np.array(groundtruths)

    skf = StratifiedKFold(n_splits=n_folds)
    # for i in range(0, 10):
    i = 0
    cur_fold = 0

    with open("../results/gender/precision.txt", "a") as filep:
        filep.write(condition + ";" + str(
            precision_score(dataset["test_groundtruths"],
                            predictions,
                            average='weighted')) + "\n")
    with open("../results/gender/recall.txt", "a") as filep:
        filep.write(condition + ";" + str(
            recall_score(dataset["test_groundtruths"],
                         predictions,
                         average='weighted')) + "\n")
    with open("../results/gender/f1.txt", "a") as filep:
        filep.write(condition + ";" + str(
            f1_score(dataset["test_groundtruths"],
                     predictions,
                     average='weighted')) + "\n")
    with open("../results/gender/accuracy.txt", "a") as filep:
        filep.write(
            condition + ";" +
            str(accuracy_score(dataset["test_groundtruths"], predictions)) +
            "\n")
    for train, test in skf.split(features, groundtruths):
        cur_fold += 1
        utils.print_success("Iteration " + str(i) + "\tFold " + str(cur_fold))
        dataset = {}
        dataset["train_features"] = features[train]
        dataset["train_groundtruths"] = groundtruths[train]
        dataset["test_features"] = features[test]
        dataset["test_groundtruths"] = groundtruths[test]
        predictions = classify(data=dataset)

        print("\tPrecision weighted\t" + str(
            precision_score(
                dataset["test_groundtruths"], predictions, average='weighted'))
              )
        print("\tRecall weighted\t" + str(
            recall_score(
                dataset["test_groundtruths"], predictions, average='weighted'))
              )
        print("\tF1 weighted\t" + str(
            f1_score(
                dataset["test_groundtruths"], predictions, average='weighted'))
              )
        print("\tAccuracy\t" +
              str(accuracy_score(dataset["test_groundtruths"], predictions)))
        with open("../results/gender/precision.txt", "a") as filep:
            filep.write(
                str(
                    precision_score(dataset["test_groundtruths"],
                                    predictions,
                                    average='weighted')) + "\n")
        with open("../results/gender/recall.txt", "a") as filep:
            filep.write(
                str(
                    recall_score(dataset["test_groundtruths"],
                                 predictions,
                                 average='weighted')) + "\n")
        with open("../results/gender/f1.txt", "a") as filep:
            filep.write(
                str(
                    f1_score(dataset["test_groundtruths"],
                             predictions,
                             average='weighted')) + "\n")
        with open("../results/gender/accuracy.txt", "a") as filep:
            filep.write(
                str(accuracy_score(dataset["test_groundtruths"], predictions))
                + "\n")