Python MFCC示例，features.MFCC Python示例

示例#1

0

显示文件

def run():
    """
    Runs the classification program using input values from the command line.
    """

    # Clear previous streams
    Main.clear_streams()

    # Persist classifier if it does not exist
    if len(glob.glob("saved_classifiers/clf_mfcc.h5")) < 1:
        print("Saving model...")
        clf_mfcc, scaler_mfcc = MFCC.train_mfcc_nn(util.data_path + "speech",
                                                   util.data_path + "music",
                                                   20000)
        clf_mfcc.save('saved_classifiers/clf_mfcc.h5')
        # Persist scaler
        joblib.dump(scaler_mfcc, "saved_classifiers/scaler_mfcc.joblib")

    else:
        print("Restoring models...")
        clf_mfcc = tf.keras.models.load_model('saved_classifiers/clf_mfcc.h5')
        scaler_mfcc = joblib.load("saved_classifiers/scaler_mfcc.joblib")

    if live_stream:
        Main.calc_from_stream(station, clf_mfcc, scaler_mfcc, is_mfcc, is_cfa,
                              listening_preference, replacement_path)
    else:
        filename = "stream_long"
        radiorec.record(station, 10, filename)
        file = "data/test/" + filename + ".mp3"
        Main.calc_from_file(file, filename, clf_mfcc, scaler_mfcc, is_mfcc,
                            is_cfa)

示例#2

0

显示文件

def evaluate_mfcc(x_tst, y_true, clf, scaler, iteration):
    """
    Evaluates the trained classifier with the given data. Prints and stores confusion matrices.
    :param x_tst: The test data
    :param y_true: The true test labels
    :param clf: The classifier
    :param scaler: The scaler
    :param iteration: The current iteration
    """
    print("Evaluating MFCC feature...")
    y_mfccs = []
    for file in tqdm(x_tst):
        # MFCC classification
        mfcc = MFCC.read_mfcc_from_file(file)
        result = MFCC.predict_nn(clf, scaler, mfcc)
        ones = np.count_nonzero(result)
        total = ones / len(result)
        if total < 0.5:
            y_mfccs.append(0)
        else:
            y_mfccs.append(1)

    print("Evaluation for MFCC feature:")

    y_pred = y_mfccs

    report = sklearn.metrics.classification_report(y_true, y_pred, labels,
                                                   target_names)
    confusion_matrix = sklearn.metrics.confusion_matrix(y_true, y_pred, labels)

    print("Report")
    print(report)

    print("Confusion Matrix")
    print(confusion_matrix)
    pretty_print_cm(confusion_matrix, "MFCC", str(iteration))
    print(
        "----------------------------------------------------------------- \n\n"
    )

示例#3

0

显示文件

文件： Output.py 项目： maxgraf96/bachelorarbeit

def print_mfcc(mfcc, clf, scaler_mfcc, result_mfcc, splits=9):
    """
    Split the incoming MFCC data into 9 blocks, make a prediction and print the result
    :param mfcc: The incoming MFCC feature vectors
    :param clf: The trained MFCC classifier
    :param scaler_mfcc: The scaler instance (from training)
    :param result_mfcc: List to which the result is written
    :param splits: How many splits should be performed on the feature vectors
    """

    split = np.array_split(mfcc, splits)

    total = 0
    for i in range(splits):
        result = MFCC.predict_nn(clf, scaler_mfcc, split[i])
        ones = np.count_nonzero(result)
        total += ones / len(result)
    result = round(total / splits, 4)
    result_mfcc[0] = result
    print("MFCC Music: ", str(result))

示例#4

0

显示文件

文件： AlgorithmComparison.py 项目： maxgraf96/bachelorarbeit

from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import SGDClassifier
from sklearn.linear_model import Perceptron
from sklearn.neural_network import MLPClassifier

import util

# Load dataset
from features import MFCC

if len(glob.glob(util.data_path + "mfcc_trn_gtzan.joblib")) < 1:
    X, Y = MFCC.calculate_mfccs(util.data_path + "speech/gtzan",
                                util.data_path + "music/gtzan",
                                max_duration=1800)
    joblib.dump(X, util.data_path + "mfcc_trn_gtzan.joblib")
    joblib.dump(X, util.data_path + "mfcc_lbls_gtzan.joblib")
else:
    X = joblib.load(util.data_path + "mfcc_trn_gtzan.joblib")
    Y = joblib.load(util.data_path + "mfcc_lbls_gtzan.joblib")

# Prepare configuration for cross validation
seed = 7

# Prepare models
models = [('LR', LogisticRegression()), ('SGD', SGDClassifier()),
          ('LDA', LinearDiscriminantAnalysis()),
          ('CART', DecisionTreeClassifier()), ('NB', GaussianNB()),
          ('PC', Perceptron()),

示例#5

0

显示文件

文件： Main.py 项目： maxgraf96/bachelorarbeit

def calc_from_stream(station, clf_mfcc, scaler_mfcc, is_mfcc, is_cfa,
                     listening_preference, replacement_path):
    """
    Streams 0.5 seconds of audio and classifies the data.
    :param station: The radio station from which the data should be streamed. See "radiorec_settings.ini" for available stations.
    :param clf_mfcc: The trained neural network classifier for MFCC classification
    :param scaler_mfcc: The scaler instance used to scaled the original MFCC training data
    :param is_mfcc: If the MFCC value should be calculated and taken into consideration for the final result
    :param is_cfa: If the CFA value should be calculated and taken into consideration for the final result
    :param listening_preference: The listening preference specifies whether spoken segments or music segments should be kept
    :param replacement_path: The path to the audio file that is played when the unwanted class is detected
    :return: void
    """
    mixer.init(frequency=16000, channels=1)
    succ_speech = 0
    succ_music = 0
    # Flag for checking if currently playing replacement
    is_replacement = False
    i = 0
    while True:
        results = {}
        current_file = "stream_" + str(i)
        radiorec.record(station, 0.5, current_file)
        path = "data/test/" + current_file + ".mp3"
        wav_path = ac.mp3_to_16_khz_wav(path)

        print("Current: " + current_file)

        # Preprocess audio
        sig, rate, frequencies, times, spectrogram = Processing.preprocessing(
            wav_path)

        # Take time
        start = time.time()

        # Use features specified in command line arguments
        if is_mfcc:
            # MFCC classification
            current_mfcc = MFCC.read_mfcc(sig, rate)
            result_mfcc = [-1]
            thread_mfcc = threading.Thread(target=Output.print_mfcc(
                current_mfcc, clf_mfcc, scaler_mfcc, result_mfcc, 9),
                                           args=(10, ))

        if is_cfa:
            # CFA classification
            cfa, peakis = CFA.calculate_cfa(spec=spectrogram,
                                            threshold=cfa_threshold)
            result = round(cfa, 4)
            results["cfa"] = result
            print("CFA Music: " + str(result))

        if is_mfcc:
            thread_mfcc.start()

        if is_mfcc:
            thread_mfcc.join()
            results["mfcc"] = result_mfcc

        # Make a decision and add to blocks
        final_result = decide(results)

        # Add to successive blocks
        if final_result > 0.5:
            succ_music += 1
            succ_speech = 0
        else:
            succ_speech += 1
            succ_music = 0

        result_str = "SPEECH" if final_result <= 0.5 else "MUSIC"
        print("FINAL RESULT: ", final_result, " => " + result_str)
        print("Successive music blocks: ", succ_music)
        print("Successive speech blocks: ", succ_speech)

        # Fadeout the track if the currently played type does not correspond to what was specified via the command line
        # 4 blocks provide a good user experience because sometimes single or double blocks are classified wrong
        if listening_preference == "music":
            if succ_speech > 4 and not is_replacement:
                mixer.music.load(replacement_path)
                mixer.music.fadeout(300)
                mixer.music.play()
                is_replacement = True
            if succ_music > 4 and is_replacement:
                is_replacement = False
                mixer.music.fadeout(300)

        if listening_preference == "speech":
            if succ_music > 4 and not is_replacement:
                mixer.music.load(replacement_path)
                mixer.music.fadeout(300)
                mixer.music.play()
                is_replacement = True
            if succ_speech > 4 and is_replacement:
                is_replacement = False
                mixer.music.fadeout(300)

        if not is_replacement:
            # Play audio stream
            mixer.music.load(wav_path)
            mixer.music.play()

        i += 1

        # Measure execution time
        end = time.time()
        print("Elapsed Time: ", str(end - start))
        print()

示例#6

0

显示文件

文件： Main.py 项目： maxgraf96/bachelorarbeit

def calc_from_file(file, filename, clf_mfcc, scaler_mfcc, is_mfcc, is_cfa):
    """
    Classifies an mp3 audio file and saves the results in a CSV and PNG file. See "plots" folder.
    :param file: The file to classify.
    :param filename: The filename. Used to save the CSV and PNG files.
    :param clf_mfcc: The trained neural network classifier for MFCC classification
    :param scaler_mfcc: The scaler instance used to scaled the original MFCC training data
    :param is_mfcc: If the MFCC value should be calculated and taken into consideration for the final result
    :param is_cfa: If the CFA value should be calculated and taken into consideration for the final result
    :return: An array containing the range of seconds of the duration of the file in steps of 0.5s. The generated speech_music_map
    """
    speech_music_map = []
    succ_speech = 0
    succ_music = 0
    path = file
    wav_path = ac.mp3_to_16_khz_wav(path)

    # Preprocess audio
    sig, rate, frequencies, times, spectrogram = Processing.preprocessing(
        wav_path)

    half_seconds = math.ceil(util.get_wav_duration(wav_path) * 2)

    mixer.init(frequency=16000, channels=1)
    mixer.music.load(wav_path)
    mixer.music.play()

    time_per_iteration = 0
    i = 0
    for i in range(half_seconds):
        # Take time
        start = time.time()

        results = {}
        print("Current: ")

        # Use features specified in command line arguments
        if is_mfcc:
            # MFCC classification
            startidx = math.floor(len(sig) * i / half_seconds)
            endidx = math.ceil(len(sig) * (i + 1) / half_seconds)
            current_mfcc = MFCC.read_mfcc(sig[startidx:endidx], rate)
            result_mfcc = [-1]
            thread_mfcc = threading.Thread(target=Output.print_mfcc(
                current_mfcc, clf_mfcc, scaler_mfcc, result_mfcc, 9),
                                           args=(10, ))

        if is_cfa:
            startidx = math.floor(spectrogram.shape[1] * i / half_seconds)
            endidx = math.ceil(spectrogram.shape[1] * (i + 1) / half_seconds)
            # CFA classification
            cfa, peakis = CFA.calculate_cfa(spec=spectrogram[:,
                                                             startidx:endidx],
                                            threshold=cfa_threshold)
            result = round(cfa, 4)
            results["cfa"] = result
            print("CFA Music: " + str(result))

        if is_mfcc:
            thread_mfcc.start()
            thread_mfcc.join()
            results["mfcc"] = result_mfcc

        # Make a decision and add to blocks
        final_result = decide(results)

        # Add to successive blocks
        if final_result > 0.5:
            succ_music += 1
            succ_speech = 0
            speech_music_map.append(1)
        else:
            succ_speech += 1
            succ_music = 0
            speech_music_map.append(0)

        result_str = "SPEECH" if final_result <= 0.5 else "MUSIC"
        print("FINAL RESULT: ", final_result, " => " + result_str)
        print("Successive music blocks: ", succ_music)
        print("Successive speech blocks: ", succ_speech)

        i += 1

        # Measure execution time
        end = time.time()
        elapsed = end - start
        time_per_iteration += elapsed if i > 1 else 0  # First iteration takes longer as numba caches all the functions
        print("Elapsed Time: ", str(elapsed))
        print()

    # Save CSV and PNG of sequence of classified data (speech_music_map)
    x = np.arange(len(
        speech_music_map)) / 2  # Convert from samples (every 0.5s to seconds)
    util.plot_speech_music_map(filename, x, speech_music_map, save_csv=True)

    print("Average time per iteration: ", str(time_per_iteration / i))

    return x, speech_music_map

示例#7

0

显示文件

def evaluate_mfcc_kfold(path_speech, path_music, max_duration, k):
    """
       Trains and evaluates a Keras neural network
       :param path_speech: The path to the speech data
       :param path_music: The path to the music data
       :param max_duration: The total duration of files that should be selected of each class. For example, 5000 would
       :param k: The number of splits on the training data
       train the network with 5000 minutes of speech files and 5000 minutes of music files
       :param test: If the data should be split into a training and a test set
       :return: The trained classifier and the scaler used to scale the training data
       """

    # Use existing training data (= extracted MFCCs). This is to skip the process of recalculating the MFCC each time.
    if len(glob.glob(util.data_path + "mfcc_trn_kfold.joblib")) < 1:
        trn, lbls = MFCC.calculate_mfccs(path_speech, path_music, max_duration)
        joblib.dump(trn, util.data_path + "mfcc_trn_kfold.joblib")
        joblib.dump(lbls, util.data_path + "mfcc_lbls_kfold.joblib")
    else:
        trn = joblib.load(util.data_path + "mfcc_trn_kfold.joblib")
        lbls = joblib.load(util.data_path + "mfcc_lbls_kfold.joblib")

    # Initialize
    kfold = sklearn.model_selection.KFold(n_splits=k, shuffle=True)

    # The accuracy of each iteration is stored in this list
    accuracies = []

    for train_index, test_index in kfold.split(trn):
        print("TRAIN:", train_index, "TEST:", test_index)

        # Take split from the total training data
        x_train, x_test = trn[train_index], trn[test_index]
        y_train, y_test = lbls[train_index], lbls[test_index]

        # Scale data
        scaler = sklearn.preprocessing.StandardScaler()
        x_train = scaler.fit_transform(x_train)

        # Classifier fitting
        # Keras nn
        clf = tf.keras.models.Sequential([
            tf.keras.layers.Flatten(input_shape=(1, 26)),
            tf.keras.layers.Dense(16, activation=tf.nn.relu),
            tf.keras.layers.Dense(8, activation=tf.nn.relu),
            tf.keras.layers.Dense(2, activation=tf.nn.softmax)
        ])
        clf.compile(optimizer='adam',
                    loss='sparse_categorical_crossentropy',
                    metrics=['accuracy'])

        # Data reshaping required for the network
        x_train = x_train.reshape((x_train.shape[0], 1, x_train.shape[1]))
        # Fit data
        clf.fit(x_train, y_train, epochs=5)

        # Run on test set
        correct = 0
        incorrect = 0

        y_pred = []
        for i in tqdm(range(len(x_test))):
            result = MFCC.predict_nn(clf, scaler, x_test[i].reshape(1, -1))
            ones = np.count_nonzero(result)
            result = ones / len(result)
            if result >= 0.5:
                y_pred.append(1)
                if y_test[i] == 1:
                    correct += 1
                else:
                    incorrect += 1
            else:
                y_pred.append(0)
                if y_test[i] == 0:
                    correct += 1
                else:
                    incorrect += 1

        accuracy = correct / (correct + incorrect)
        accuracies.append(accuracy)
        print("Results for test set: ", str(round(accuracy, 2)) + "%")

        y_pred = np.array(y_pred)

        report = sklearn.metrics.classification_report(y_test, y_pred, labels,
                                                       target_names)
        confusion_matrix = sklearn.metrics.confusion_matrix(
            y_test, y_pred, labels)

        print("Report")
        print(report)

        print("Confusion Matrix")
        print(confusion_matrix)
        pretty_print_cm(confusion_matrix, "MFCC", str(train_index))
        print(
            "----------------------------------------------------------------- \n\n"
        )

    print("Mean accuracy:", np.mean(np.array(accuracies)))