示例#1
0
def getScoresFromWav(wavF, annF, feExFun, clf, lt, labelsHierarchy):
    """
    extracts features and its labels (ground truth) from wavF and annF files
    and compares the clf predictions with the ground truth
    Parameters
    ----------
    wavF: str
    annF: str
    feExFun: callable
    clf : classifier
    le : label encoder (object)
    labelsHierarchy: list
    """

    A, a_names = fex.getXy_fromWavFAnnF(wavF,
                                        annF,
                                        feExFun,
                                        labelsHierarchy,
                                        filter_classes=lt.classes_)
    a = lt.nom2num(a_names)
    return clfScoresO(clf, A, a)
示例#2
0
def runExperiment(
    train_coll,
    test_coll,
    lt,
    T_settings,
    labsHierarchy,
    out_fN,
    cv,
    pipe_estimators,
    gs_grid,
    scoring=None,
    param=None,
    predictionsDir=None,
):
    """Runs clf experiments
    Parameters
    ----------
        train_coll: list
        test_coll: list
        lt: ML.labelTransformer
        T_settings: list of tuples
        labelsHierachy: list of strings
        cv: cv folds
        pipe_estimators: list
            for pipline
        gs_grid: list

        out_fN: str
        returnClfs: dict, Flase => clfs are not stored
        predictionsDir: str
        scoring: string or sklearn.metrics.scorer
    """

    Tpipe = fex.makeTransformationsPipeline(T_settings)
    feExFun = Tpipe.fun
    #### prepare DATA: collections --> X y
    ## compute features
    dataO = fex.wavAnnCollection2datXy(train_coll, feExFun, labsHierarchy)
    ## prepare X y data
    X0, y0_names = dataO.filterInstances(lt.classes_)  # filter for clf_labs
    X, y_names = (
        X0,
        y0_names,
    )  # myML.balanceToClass(X0, y0_names, 'c')  # balance classes X0, y0_names#
    y = lt.nom2num(y_names)
    labsD = lt.targetNumNomDict()
    with open(
            out_fN, "a"
    ) as out_file:  # print details about the dataset into status file
        out_file.write("# {} ({})\n".format(collFi_train, len(train_coll)))
        out_file.write("#label_transformer {} {}\t data {}\n".format(
            lt.targetNumNomDict(), lt.classes_, Counter(y_names)))

    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=testFrac,
                                                        random_state=0)

    with open(out_fN, "a") as out_file:  # print details to status file
        out_file.write("#TRAIN, shape {}\n".format(np.shape(X_train)))
        out_file.write("#TEST, shape {}\n".format(np.shape(X_test)))

    #### CLF
    pipe = Pipeline(pipe_estimators)
    gs = GridSearchCV(estimator=pipe,
                      param_grid=gs_grid,
                      scoring=scoring,
                      cv=cv,
                      n_jobs=-1)

    gs = gs.fit(X_train, y_train)
    ## best clf scores
    with open(out_fN, "a") as out_file:
        out_file.write("#CLF\t{}\tbest score {:.3f}\n".format(
            str(gs.best_params_).replace("\n", ""), gs.best_score_))
    clf_best = gs.best_estimator_

    ## clf scores over test set
    with open(out_fN, "a") as out_file:
        ### cv score
        cv_sc = cross_val_score(clf_best, X_test, y_test, scoring=scoring)
        out_file.write("{:2.2f}, {:2.2f}, {:.2f}, ".format(
            param, 100 * np.mean(cv_sc), 100 * 2 * np.std(cv_sc)))
        ### cv accuracy
        cv_acc = cross_val_score(clf_best, X_test, y_test)
        out_file.write("{:2.2f}, {:.2f}, ".format(100 * np.mean(cv_acc),
                                                  100 * 2 * np.std(cv_acc)))
    ## print R, P an f1 for each class
    y_true, y_pred = y_test, clf_best.predict(X_test)
    MLvl.print_precision_recall_fscore_support(y_true, y_pred, out_fN)

    #### TEST collection
    ### train classifier with whole dataset
    clf = skb.clone(
        gs.best_estimator_
    )  # clone to create a new classifier with the same parameters
    clf.fit(X, y)
    ### print scores
    callIx = lt.nom2num("c")
    for wavF, annF in test_coll[:]:
        A, a_names = fex.getXy_fromWavFAnnF(wavF,
                                            annF,
                                            feExFun,
                                            labsHierarchy,
                                            filter_classes=lt.classes_)
        a_true = lt.nom2num(a_names)
        a_pred = clf.predict(A)
        P = mt.precision_score(a_true, a_pred, average=None)[callIx]
        R = mt.recall_score(a_true, a_pred, average=None)[callIx]
        f1 = mt.f1_score(a_true, a_pred, average=None)[callIx]
        with open(out_fN, "a") as out_file:
            out_file.write(", {:2.2f}, {:2.2f}, {:2.2f}".format(
                100 * f1, 100 * P, 100 * R))
        if predictionsDir:
            bN = os.path.basename(annF)
            annFile_predict = os.path.join(
                predictionsDir, "{}_{}_{}".format(int(f1 * 100), int(param),
                                                  bN))
            pT.predictSoundSections(wavF,
                                    clf,
                                    lt,
                                    feExFun,
                                    annSections=labsHierarchy,
                                    outF=annFile_predict)

    with open(out_fN, "a") as out_file:
        out_file.write("\n")

    return clf