Exemplo n.º 1
0
def extract_features(data_dir):
    m_win, m_step, s_win, s_step = 1, 1, 0.1, 0.05
    fs = []
    filess = []
    fns = []
    for root, dirs, files in os.walk(data_dir):
        for d in dirs:
            print("PATH: " + os.path.join(root, d))
            try:
                f, files, fn = aF.directory_feature_extraction(
                    os.path.join(root, d), m_win, m_step, s_win, s_step)
                fs.append(f)
                filess.extend(files)
                fns.append(fn)
            except:
                pass
    fs = np.concatenate(fs, axis=0)
    fn.append('beat')
    fn.append('beat_conf')

    frame = pd.DataFrame(fs, columns=fn)
    frame['audio_path'] = filess

    save_path = "./features.csv"
    frame.to_csv(save_path)
Exemplo n.º 2
0
def multiple_directory_feature_extraction(path_list,
                                          mid_window,
                                          mid_step,
                                          short_window,
                                          short_step,
                                          compute_beat=False):
    """
   this version is used to get the whole info of features, class_names, feature_names,file_names
    """
    # feature extraction for each class:
    features = []
    class_names = []
    file_names = []
    for i, d in enumerate(path_list):
        f, fn, feature_names = \
            aF.directory_feature_extraction(d, mid_window, mid_step,
                                         short_window, short_step,
                                         compute_beat=compute_beat)
        if f.shape[0] > 0:
            # if at least one audio file has been found in the provided folder:
            features.append(f)
            file_names.append(fn)
            print(feature_names)
            if d[-1] == os.sep:
                class_names.append(d.split(os.sep)[-2])
            else:
                class_names.append(d.split(os.sep)[-1])
    return features, class_names, feature_names, file_names
Exemplo n.º 3
0
 def exp4():
     print('pyAudioAnalysis example 4')
     dirs = [
         '{0}music/classical'.format(AfeExp.data_folder),
         '{0}music/metal'.format(AfeExp.data_folder)
     ]
     class_names = ['classical', 'metal']
     m_win, m_step, s_win, s_step = 1, 1, 0.1, 0.05
     features = []
     for d in dirs:  # get feature matrix for each directory (class)
         f, files, fn = aMF.directory_feature_extraction(
             d, m_win, m_step, s_win, s_step)
         features.append(f)
     print(features[0].shape, features[1].shape)
     f1 = np.array([
         features[0][:, fn.index('spectral_centroid_mean')],
         features[0][:, fn.index('energy_entropy_mean')]
     ])
     f2 = np.array([
         features[1][:, fn.index('spectral_centroid_mean')],
         features[1][:, fn.index('energy_entropy_mean')]
     ])
     plots = [
         go.Scatter(x=f1[0, :],
                    y=f1[1, :],
                    name=class_names[0],
                    mode='markers'),
         go.Scatter(x=f2[0, :],
                    y=f2[1, :],
                    name=class_names[1],
                    mode='markers')
     ]
     mylayout = go.Layout(xaxis=dict(title="spectral_centroid_mean"),
                          yaxis=dict(title="energy_entropy_mean"))
     #plotly.offline.iplot(go.Figure(data=plots, layout=mylayout))
     plotly.offline.plot({
         'data': plots,
         'layout': mylayout
     },
                         auto_open=True)
Exemplo n.º 4
0
def extract_features():
    # initialize feature data structures
    m3u_paths = {}
    librosa_feats = {}
    pyaudio_feats = {}
    feat_names = None

    # variables for pyAudioAnalysis feature extraction
    mid_term_window = 1
    mid_term_step = 1
    short_term_window = 0.05
    short_term_step = 0.05

    for root, dirs, files in os.walk(path, topdown=False):
        # extract features from the root directory chosen
        pyaudio_feat, song_files, feat_names = MidTermFeatures.directory_feature_extraction(
            root, mid_term_window, mid_term_step, short_term_window,
            short_term_step, False)

        # update the progress bar
        progress['value'] = 30
        tkobj2.update()

        index = 0
        for s in song_files:
            # save features as dictionary with song names as keys and pyAudioAnalysis features as values
            s_dict_name = ntpath.basename(s)
            if pyaudio_feat.ndim == 1:
                # special case of indexing when there is only one song
                pyaudio_feats[s_dict_name] = pyaudio_feat
            else:
                pyaudio_feats[s_dict_name] = pyaudio_feat[index]
            index += 1

        for folder in dirs:
            # extract pyaudioanalysis features from each subfolder
            folder_path = os.path.join(root, folder)
            pyaudio_feat, song_files, feat_names = MidTermFeatures.directory_feature_extraction(
                folder_path, mid_term_window, mid_term_step, short_term_window,
                short_term_step, False)

            index = 0
            for s in song_files:
                # save features as dictionary with song names as keys and pyAudioAnalysis features as values
                s_dict_name = ntpath.basename(s)
                if pyaudio_feat.ndim == 1:
                    # special case of indexing when there is only one song
                    pyaudio_feats[s_dict_name] = pyaudio_feat
                else:
                    pyaudio_feats[s_dict_name] = pyaudio_feat[index]
                index += 1

        # go through all mp3 files to extract librosa features
        for song in files:
            song_path = os.path.join(root, song)
            if song_path.endswith(".mp3"):
                # get the tempo of the song
                waveform, samp_rate = librosa.load(song_path)
                tempo, beat_frames = librosa.beat.beat_track(
                    waveform, samp_rate)

                # get the chroma number of the song
                beat_times = librosa.frames_to_time(beat_frames, samp_rate)
                y_harmonic, y_percussive = librosa.effects.hpss(waveform)
                chromagram = librosa.feature.chroma_cqt(y_harmonic, samp_rate)
                beat_chroma = librosa.util.sync(chromagram,
                                                beat_frames,
                                                aggregate=np.median)

                # calculate the diff of beat chroma to convert information into a single float
                chroma_df = pd.DataFrame(beat_chroma)
                diff_values = chroma_df.diff()
                diff_mean = diff_values.mean(axis=0, skipna=True)
                chroma_num = sum(diff_mean) / len(diff_mean)

                # save the librosa features and the full path of each mp3 song
                librosa_feats[song] = [tempo, chroma_num]
                m3u_paths[song] = song_path

    # update the progress bar
    progress['value'] = 75
    tkobj2.update()

    return (m3u_paths, librosa_feats, pyaudio_feats, feat_names)
def visualizeFeaturesFolder(folder, dimReductionMethod, priorKnowledge="none"):
    '''
    This function generates a chordial visualization for the recordings of the provided path.
    ARGUMENTS:
        - folder:        path of the folder that contains the WAV files to be processed
        - dimReductionMethod:    method used to reduce the dimension of the initial feature space before computing the similarity.
        - priorKnowledge:    if this is set equal to "artist"
    '''
    if dimReductionMethod == "pca":
        allMtFeatures, wavFilesList, _ = aF.directory_feature_extraction(
            folder, 30.0, 30.0, 0.050, 0.050, compute_beat=True)
        if allMtFeatures.shape[0] == 0:
            print("Error: No data found! Check input folder")
            return

        namesCategoryToVisualize = [
            ntpath.basename(w).replace('.wav', '').split(" --- ")[0]
            for w in wavFilesList
        ]
        namesToVisualize = [
            ntpath.basename(w).replace('.wav', '') for w in wavFilesList
        ]

        (F, MEAN, STD) = aT.normalize_features([allMtFeatures])
        F = np.concatenate(F)

        # check that the new PCA dimension is at most equal to the number of samples
        K1 = 2
        K2 = 10
        if K1 > F.shape[0]:
            K1 = F.shape[0]
        if K2 > F.shape[0]:
            K2 = F.shape[0]
        pca1 = sklearn.decomposition.PCA(n_components=K1)
        pca1.fit(F)
        pca2 = sklearn.decomposition.PCA(n_components=K2)
        pca2.fit(F)

        finalDims = pca1.transform(F)
        finalDims2 = pca2.transform(F)
    else:
        allMtFeatures, Ys, wavFilesList = aF.directory_feature_extraction_no_avg(
            folder, 20.0, 5.0, 0.040, 0.040
        )  # long-term statistics cannot be applied in this context (LDA needs mid-term features)
        if allMtFeatures.shape[0] == 0:
            print("Error: No data found! Check input folder")
            return

        namesCategoryToVisualize = [
            ntpath.basename(w).replace('.wav', '').split(" --- ")[0]
            for w in wavFilesList
        ]
        namesToVisualize = [
            ntpath.basename(w).replace('.wav', '') for w in wavFilesList
        ]

        ldaLabels = Ys
        if priorKnowledge == "artist":
            uNamesCategoryToVisualize = list(set(namesCategoryToVisualize))
            YsNew = np.zeros(Ys.shape)
            for i, uname in enumerate(
                    uNamesCategoryToVisualize):  # for each unique artist name:
                indicesUCategories = [
                    j for j, x in enumerate(namesCategoryToVisualize)
                    if x == uname
                ]
                for j in indicesUCategories:
                    indices = np.nonzero(Ys == j)
                    YsNew[indices] = i
            ldaLabels = YsNew

        (F, MEAN, STD) = aT.normalize_features([allMtFeatures])
        F = np.array(F[0])

        clf = sklearn.discriminant_analysis.LinearDiscriminantAnalysis(
            n_components=10)
        clf.fit(F, ldaLabels)
        reducedDims = clf.transform(F)

        pca = sklearn.decomposition.PCA(n_components=2)
        pca.fit(reducedDims)
        reducedDims = pca.transform(reducedDims)

        # TODO: CHECK THIS ... SHOULD LDA USED IN SEMI-SUPERVISED ONLY????

        uLabels = np.sort(
            np.unique((Ys))
        )  # uLabels must have as many labels as the number of wavFilesList elements
        reducedDimsAvg = np.zeros((uLabels.shape[0], reducedDims.shape[1]))
        finalDims = np.zeros((uLabels.shape[0], 2))
        for i, u in enumerate(uLabels):
            indices = [j for j, x in enumerate(Ys) if x == u]
            f = reducedDims[indices, :]
            finalDims[i, :] = f.mean(axis=0)
        finalDims2 = reducedDims

    for i in range(finalDims.shape[0]):
        plt.text(finalDims[i, 0],
                 finalDims[i, 1],
                 ntpath.basename(wavFilesList[i].replace('.wav', '')),
                 horizontalalignment='center',
                 verticalalignment='center',
                 fontsize=10)
        plt.plot(finalDims[i, 0], finalDims[i, 1], '*r')
    plt.xlim([1.2 * finalDims[:, 0].min(), 1.2 * finalDims[:, 0].max()])
    plt.ylim([1.2 * finalDims[:, 1].min(), 1.2 * finalDims[:, 1].max()])
    plt.show()

    SM = 1.0 - distance.squareform(distance.pdist(finalDims2, 'cosine'))
    for i in range(SM.shape[0]):
        SM[i, i] = 0.0

    chordialDiagram("visualization", SM, 0.50, namesToVisualize,
                    namesCategoryToVisualize)

    SM = 1.0 - distance.squareform(distance.pdist(F, 'cosine'))
    for i in range(SM.shape[0]):
        SM[i, i] = 0.0
    chordialDiagram("visualizationInitial", SM, 0.50, namesToVisualize,
                    namesCategoryToVisualize)

    # plot super-categories (i.e. artistname
    uNamesCategoryToVisualize = sort(list(set(namesCategoryToVisualize)))
    finalDimsGroup = np.zeros(
        (len(uNamesCategoryToVisualize), finalDims2.shape[1]))
    for i, uname in enumerate(uNamesCategoryToVisualize):
        indices = [
            j for j, x in enumerate(namesCategoryToVisualize) if x == uname
        ]
        f = finalDims2[indices, :]
        finalDimsGroup[i, :] = f.mean(axis=0)

    SMgroup = 1.0 - distance.squareform(
        distance.pdist(finalDimsGroup, 'cosine'))
    for i in range(SMgroup.shape[0]):
        SMgroup[i, i] = 0.0
    chordialDiagram("visualizationGroup", SMgroup, 0.50,
                    uNamesCategoryToVisualize, uNamesCategoryToVisualize)
Exemplo n.º 6
0
"""! 
@brief Example 13
@details pyAudioAnalysis feature extraction for classes organized in folders
and feature histogram representation (per feature and class).
3-class classification task: animals vs speech vs music segments
@author Theodoros Giannakopoulos {[email protected]}
"""
from pyAudioAnalysis import MidTermFeatures as aF
import os.path
import utilities as ut

if __name__ == '__main__':
    dirs = ["../data/general/animals",
            "../data/general/speech",
            "../data/general/music"]
    class_names = [os.path.basename(d) for d in dirs]
    m_win, m_step, s_win, s_step = 1, 1, 0.1, 0.05
    features = []
    for d in dirs:
        # get feature matrix for each directory (class)
        f, files, fn = aF.directory_feature_extraction(d, m_win, m_step, s_win,
                                                  s_step)
        features.append(f)
    ut.plot_feature_histograms(features, fn, class_names)
Exemplo n.º 7
0
    parse = parse_arguments()
    if parse.audio is None:
        raise 'Input directory is Empty'
    if not os.path.isdir(parse.audio):
        raise 'Input path is not a directory'
    if parse.groundtruth is None:
        raise 'Ground truth directory is Empty'
    if not os.path.isdir(parse.audio):
        raise 'Ground truth path is not a directory'

    files, labels = read_data(parse.audio, parse.groundtruth)

    one_hot = MultiLabelBinarizer()
    labels = one_hot.fit_transform(labels)
    class_names = [str(c) for c in one_hot.classes_]

    mid_window, mid_step, short_window, short_step = 1, 1, 0.1, 0.1
    f, fn, feature_names = mF.directory_feature_extraction(
        parse.audio, mid_window, mid_step, short_window, short_step)

    X_train, y_train, X_test, y_test = split_data(f, labels, fn)
    print("LinearSVc Classifier")
    classifier = OneVsRestClassifier(LinearSVC(max_iter=10000), n_jobs=-1)
    classifier.fit(X_train, y_train)
    pickle.dump(classifier, open(parse.output + ".sav", 'wb'))

    test_yhat = classifier.predict(X_test)
    print("Testing SVM Classification Report {0} %".format(
        classification_report(y_test, test_yhat, target_names=class_names)))
def visualizeFeaturesFolder(folder, dimReductionMethod, priorKnowledge="none"):
    '''
    This function generates a  content visualization for the recordings
     of the provided path.
    ARGUMENTS:
        - folder:        path of the folder that contains the WAV files 
                         to be processed
        - dimReductionMethod:    method used to reduce the dimension of the 
                                 initial feature space before computing 
                                 the similarity.
        - priorKnowledge:    if this is set equal to "artist"
    '''
    if dimReductionMethod == "pca":
        all_mt_feat, wav_files, _ = aF.directory_feature_extraction(
            folder, 30.0, 30.0, 0.050, 0.050, compute_beat=True)
        if all_mt_feat.shape[0] == 0:
            print("Error: No data found! Check input folder")
            return

        names_category_toviz = [
            ntpath.basename(w).replace('.wav', '').split(" --- ")[0]
            for w in wav_files
        ]
        names_to_viz = [
            ntpath.basename(w).replace('.wav', '') for w in wav_files
        ]

        scaler = StandardScaler()
        F = scaler.fit_transform(all_mt_feat)

        # check that the new PCA dimension is at most equal
        # to the number of samples
        K1 = 2
        K2 = 10
        if K1 > F.shape[0]:
            K1 = F.shape[0]
        if K2 > F.shape[0]:
            K2 = F.shape[0]
        pca1 = sklearn.decomposition.PCA(n_components=K1)
        pca1.fit(F)
        pca2 = sklearn.decomposition.PCA(n_components=K2)
        pca2.fit(F)

        finalDims = pca1.transform(F)
        finalDims2 = pca2.transform(F)
    else:
        # long-term statistics cannot be applied in this context
        # (LDA needs mid-term features)
        all_mt_feat, Ys, wav_files = aF.\
            directory_feature_extraction_no_avg(folder, 20.0, 5.0, 0.040, 0.040)
        if all_mt_feat.shape[0] == 0:
            print("Error: No data found! Check input folder")
            return

        names_category_toviz = [
            ntpath.basename(w).replace('.wav', '').split(" --- ")[0]
            for w in wav_files
        ]
        names_to_viz = [
            ntpath.basename(w).replace('.wav', '') for w in wav_files
        ]

        ldaLabels = Ys
        if priorKnowledge == "artist":
            unames_category_toviz = list(set(names_category_toviz))
            YsNew = np.zeros(Ys.shape)
            for i, uname in enumerate(unames_category_toviz):
                indicesUCategories = [
                    j for j, x in enumerate(names_category_toviz) if x == uname
                ]
                for j in indicesUCategories:
                    indices = np.nonzero(Ys == j)
                    YsNew[indices] = i
            ldaLabels = YsNew

        scaler = StandardScaler()
        F = scaler.fit_transform(all_mt_feat)

        clf = sklearn.discriminant_analysis.\
            LinearDiscriminantAnalysis(n_components=10)
        clf.fit(F, ldaLabels)
        reducedDims = clf.transform(F)

        pca = sklearn.decomposition.PCA(n_components=2)
        pca.fit(reducedDims)
        reducedDims = pca.transform(reducedDims)

        # TODO: CHECK THIS ... SHOULD LDA USED IN SEMI-SUPERVISED ONLY????
        # uLabels must have as many labels as the number of wav_files elements
        uLabels = np.sort(np.unique((Ys)))
        reducedDimsAvg = np.zeros((uLabels.shape[0], reducedDims.shape[1]))
        finalDims = np.zeros((uLabels.shape[0], 2))
        for i, u in enumerate(uLabels):
            indices = [j for j, x in enumerate(Ys) if x == u]
            f = reducedDims[indices, :]
            finalDims[i, :] = f.mean(axis=0)
        finalDims2 = reducedDims

    for i in range(finalDims.shape[0]):
        plt.text(finalDims[i, 0],
                 finalDims[i, 1],
                 ntpath.basename(wav_files[i].replace('.wav', '')),
                 horizontalalignment='center',
                 verticalalignment='center',
                 fontsize=10)
        plt.plot(finalDims[i, 0], finalDims[i, 1], '*r')
    plt.xlim([1.2 * finalDims[:, 0].min(), 1.2 * finalDims[:, 0].max()])
    plt.ylim([1.2 * finalDims[:, 1].min(), 1.2 * finalDims[:, 1].max()])
    plt.show()

    SM = 1.0 - distance.squareform(distance.pdist(F, 'cosine'))

    # plot super-categories (i.e. artistname)
    unames_category_toviz = sort(list(set(names_category_toviz)))
    finalDimsGroup = np.zeros(
        (len(unames_category_toviz), finalDims2.shape[1]))
    for i, uname in enumerate(unames_category_toviz):
        indices = [j for j, x in enumerate(names_category_toviz) if x == uname]
        f = finalDims2[indices, :]
        finalDimsGroup[i, :] = f.mean(axis=0)

    SMgroup = 1.0 - distance.squareform(
        distance.pdist(finalDimsGroup, 'cosine'))

    data = SMgroup
    fig = px.imshow(data,
                    labels=dict(x="", y="", color="Category similarity"),
                    x=unames_category_toviz,
                    y=unames_category_toviz)
    fig.update_xaxes(side="top")
    fig.show()
Exemplo n.º 9
0
from torchvision import transforms
from torchvision.datasets import MNIST
from torchvision.utils import save_image

from pyAudioAnalysis import MidTermFeatures as mtf
import numpy as np
import pickle
import os.path

if os.path.isfile('data.pkl'):
    with open('data.pkl', 'rb') as f:
        mid_term_features_2 = pickle.load(f)
        wav_file_list2 = pickle.load(f)
else:
    with open('data.pkl', 'wb') as f:
        mid_term_features, wav_file_list2, mid_feature_names = mtf.directory_feature_extraction(
            'audio', 1, 1, 0.2, 0.2)
        mid_term_features = mid_term_features[:, 0:128]
        m = mid_term_features.mean(axis=0)
        s = np.std(mid_term_features, axis=0)
        mid_term_features_2 = (mid_term_features - m) / s
        pickle.dump(mid_term_features_2, f)
        pickle.dump(wav_file_list2, f)
x = torch.tensor(mid_term_features_2, dtype=torch.float32)

num_epochs = 200
batch_size = 128
learning_rate = 1e-3

dataset = TensorDataset(x)
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
Exemplo n.º 10
0
The pyAudioAnalysis has two functions in order to extract a bunch of useful
features from a wav file.
'''

from pyAudioAnalysis import MidTermFeatures as mF
import numpy as np
import pandas as pd
import os

basepath_train_cough = 'C:/Users/Guillem/Desktop/HACKATHON 2020/Unlabeled audio/TRAIN/Cough/'
basepath_train_nocough = 'C:/Users/Guillem/Desktop/HACKATHON 2020/Unlabeled audio/TRAIN/No_Cough/'

[mid_term_features_cough, wav_file_list_cough,
 mid_feature_names] = mF.directory_feature_extraction(basepath_train_cough,
                                                      0.1,
                                                      0.1,
                                                      0.01,
                                                      0.01,
                                                      compute_beat=False)
[mid_term_features_nocough, wav_file_list_nocough,
 mid_feature_names] = mF.directory_feature_extraction(basepath_train_nocough,
                                                      0.1,
                                                      0.1,
                                                      0.01,
                                                      0.01,
                                                      compute_beat=False)

label_nocough = np.zeros(np.shape(mid_term_features_nocough)[0])
label_cough = np.ones(np.shape(mid_term_features_cough)[0])

features = np.concatenate(
    (mid_term_features_cough,
Exemplo n.º 11
0
    def exp5():
        print('pyAudioAnalysis example 5')
        dirs = [
            '{0}music/classical'.format(AfeExp.data_folder),
            '{0}music/metal'.format(AfeExp.data_folder)
        ]
        class_names = ['classical', 'metal']
        m_win, m_step, s_win, s_step = 1, 1, 0.1, 0.05
        features = []
        for d in dirs:  # get feature matrix for each directory (class)
            f, files, fn = aMF.directory_feature_extraction(
                d, m_win, m_step, s_win, s_step)
            features.append(f)
        print(features[0].shape, features[1].shape)
        f1 = np.array([
            features[0][:, fn.index('spectral_centroid_mean')],
            features[0][:, fn.index('energy_entropy_mean')]
        ])
        f2 = np.array([
            features[1][:, fn.index('spectral_centroid_mean')],
            features[1][:, fn.index('energy_entropy_mean')]
        ])

        print('f1 type:{0}; shape:{1}; value:{2};'.format(
            type(f1), f1.shape, f1))
        print('f2 type:{0}; shape:{1}; value:{2};'.format(
            type(f2), f2.shape, f2))

        y = np.concatenate((np.zeros(f1.shape[1]), np.ones(f2.shape[1])))
        f = np.concatenate((f1.T, f2.T), axis=0)
        print('y: {0}; {1};'.format(y.shape, y))
        print('X: {0}; {1};'.format(f.shape, f))
        # train the svm classifier
        cl = sks.SVC(kernel='rbf', C=20)
        cl.fit(f, y)

        p1 = go.Scatter(x=f1[0, :],
                        y=f1[1, :],
                        name=class_names[0],
                        marker=dict(size=10, color='rgba(255, 182, 193, .9)'),
                        mode='markers')
        p2 = go.Scatter(x=f2[0, :],
                        y=f2[1, :],
                        name=class_names[1],
                        marker=dict(size=10, color='rgba(100, 100, 220, .9)'),
                        mode='markers')
        mylayout = go.Layout(xaxis=dict(title="spectral_centroid_mean"),
                             yaxis=dict(title="energy_entropy_mean"))

        # apply the trained model on the points of a grid
        x_ = np.arange(f[:, 0].min(), f[:, 0].max(), 0.002)
        y_ = np.arange(f[:, 1].min(), f[:, 1].max(), 0.002)
        xx, yy = np.meshgrid(x_, y_)
        X_t = np.c_[xx.ravel(), yy.ravel()]
        print('X_t: {0};'.format(X_t.shape))
        Z = cl.predict(np.c_[xx.ravel(), yy.ravel()]).reshape(xx.shape) / 2
        # and visualize the grid on the same plot (decision surfaces)
        cs = go.Heatmap(x=x_,
                        y=y_,
                        z=Z,
                        showscale=False,
                        colorscale=[[0, 'rgba(255, 182, 193, .3)'],
                                    [1, 'rgba(100, 100, 220, .3)']])
        mylayout = go.Layout(xaxis=dict(title="spectral_centroid_mean"),
                             yaxis=dict(title="energy_entropy_mean"))
        #plotly.offline.iplot(go.Figure(data=[p1, p2, cs], layout=mylayout))
        plotly.offline.plot({
            'data': [p1, p2, cs],
            'layout': mylayout
        },
                            auto_open=True)
Exemplo n.º 12
0
    for audio in os.listdir(directory):
        wav_file_path = os.path.join(directory, audio)

        with sf.SoundFile(wav_file_path) as f:
            duration = (len(f) / f.samplerate)

        if (duration < 1):
            os.remove(wav_file_path)

    remove_len = len(os.listdir(directory))
    print('In total, {} audios have been removed due to short duration'.format(original_len-remove_len))

check_duration(pos_path)
check_duration(neg_path)

[mid_term_features_pos, wav_file_list_pos, mid_feature_names] =  mF.directory_feature_extraction(pos_path, 0.5,0.5, 0.05, 0.05, compute_beat=False)
[mid_term_features_neg, wav_file_list_neg, mid_feature_names] =  mF.directory_feature_extraction(neg_path, 0.5,0.5, 0.05, 0.05, compute_beat=False)

filenames_pos = []
for file in wav_file_list_pos:
    filenames_pos.append(file.split('/')[-1].split('\\')[-1].split('.')[0])

filenames_neg = []
for file in wav_file_list_neg:
    filenames_neg.append(file.split('/')[-1].split('\\')[-1].split('.')[0])

df_pos = pd.DataFrame(mid_term_features_pos, columns = mid_feature_names)
df_pos['filename'] = filenames_pos
df_pos['label'] = np.ones(len(df_pos))

df_neg = pd.DataFrame(mid_term_features_neg, columns = mid_feature_names)