示例#1
0
    def __init__(self, setting_file: str):
        """
        Initialization for parameters and classes
        :param setting_file: config file
        """
        # Load parameters from config file
        self.cfg = ConfigReader(setting_file)
        self.sampling_rate = self.cfg.sampling_rate
        self.frame_time = self.cfg.frame_time
        self.overlap_rate = self.cfg.overlap_rate
        self.window_type = self.cfg.window_type
        self.fft_size = self.cfg.fft_size
        self.mod_fft_size = self.cfg.mod_fft_size
        self.window_size = int(self.sampling_rate*0.001*self.frame_time)
        self.hop_size = int(self.window_size*self.cfg.overlap_rate)

        # Initialize pre-processing
        self.APP = AudioPreProcess(self.frame_time, self.overlap_rate, self.window_type)

        # Feature selection
        self.short_feature_selection_dict = self.cfg.section_reader("short_feature_selection")
        self.long_feature_selection_dict = self.cfg.section_reader("long_feature_selection")
        self.short_feature_list = self.__init_short_feature_select()
        self.long_feature_list = self.__init_long_feature_select()

        # Initialize feature extraction classes
        self.mfcc = MFCC(self.cfg.mfcc_coeff, self.sampling_rate, self.fft_size, self.cfg.mfcc_total_filters)
        self.flux = Flux(self.sampling_rate)
        self.osc = OSC(self.cfg.osc_param, self.sampling_rate, self.fft_size)
        self.msf = MSF(self.cfg.omsc_param, self.sampling_rate, self.fft_size, self.mod_fft_size)
示例#2
0
    def __init__(self, setting_file: str):
        """
        Initialization for parameters and classes
        :param setting_file: config file
        """
        # Load parameters from config file
        self.cfg = ConfigReader(setting_file)
        self.k = self.cfg.k
        self.num_classes = self.cfg.num_classes
        self.validation_rate = self.cfg.validation_rate

        # Initialize classifier selection
        self.classifier_selection_dict = self.cfg.section_reader("classifier_selection")
        self.selected_classifier = self.__init_classifier_select()
        self.classifier = self.__init_classifier()
示例#3
0
 def __init__(self, setting_file: str):
     """
     Initialization for parameters and classes
     :param setting_file: config file
     """
     self.cfg = ConfigReader(setting_file)
     self.sampling_rate = self.cfg.sampling_rate
     self.audio_length = self.cfg.audio_length
     self.normalize = self.cfg.normalize
 def __init__(self, audio_dataset_maker: classmethod, audio_feature_extraction: classmethod, classifier: classmethod, \
              dataset_path: str, setting_file: str):
     """
     Init
     :param  audio_dataset_maker:      audio dataset make class
     :param  audio_feature_extraction: audio feature extraction class
     :param  classifier:               classifier class
     :param  dataset_path:             path to data set
     :param  setting_file:             config file
     """
     self.ADM = audio_dataset_maker(setting_file)
     self.AFE = audio_feature_extraction(setting_file)
     self.CLF = classifier(setting_file)
     self.dataset_path = dataset_path
     self.cfg = ConfigReader(setting_file)
     self.setting_file = setting_file
class Classifier:

    # Initialization
    def __init__(self, setting_file: str):
        """
        Initialization for parameters and classes
        :param setting_file: config file
        """
        # Load parameters from config file
        self.cfg = ConfigReader(setting_file)
        self.k = self.cfg.k
        self.num_classes = self.cfg.num_classes
        self.validation_rate = self.cfg.validation_rate

        # Initialize classifier selection
        self.classifier_selection_dict = self.cfg.section_reader(
            "classifier_selection")
        self.selected_classifier = self.__init_classifier_select()
        self.classifier = self.__init_classifier()

    def __init_classifier_select(self) -> str:
        """
        Load config file and select classifier
        :return selected classifier name
        """
        # Search which classifier is True
        self.selected_classifier = [
            classifier
            for classifier, switch in self.classifier_selection_dict.items()
            if switch == "True"
        ]
        assert len(self.selected_classifier
                   ) == 1, "You can select only one classifier at one time"
        return str(self.selected_classifier[0])

    def __init_classifier(self):
        """
        Initialize classifier class
        :return Initialized classifier
        """
        classifier = None
        # Initialize chosen classifier
        if self.selected_classifier == "knn":
            classifier = kNN(self.k)
        elif self.selected_classifier == "logistic_regression":
            classifier = LogisticRegression(self.validation_rate,
                                            self.num_classes)
        elif self.selected_classifier == "mlp":
            classifier = MLP(self.validation_rate, self.num_classes)
        elif self.selected_classifier == "cnn":
            classifier = CNN(self.validation_rate, self.num_classes)
        elif self.selected_classifier == "resnet":
            classifier = ResNet(self.validation_rate, self.num_classes)
        elif self.selected_classifier == "rnn":
            classifier = CustomRNN(self.validation_rate, self.num_classes)
        assert classifier is not None, "No classifier selected. Please select one"
        return classifier

    def make_dataset_loader(self, train_data_with_label, test_data_with_label,
                            validation_rate):
        """
        Dataset loader for Torch
        :return train loader, test loader
        """
        # Make Dataset loader
        train_loader, validation_loader, test_loader = DataProcess.torch_data_loader(
            train_data_with_label, test_data_with_label, validation_rate)
        return train_loader, validation_loader, test_loader

    def load_model(self, input_model_file_name: str):
        """
        Load trained model
        :param  input_model_file_name: model file name
        :return trained model
        """
        assert os.path.exists(
            input_model_file_name), "Selected model does not exist"
        # Load/Unpickle model
        if input_model_file_name.endswith(".pickle"):
            with open(input_model_file_name, mode='rb') as fp:
                loaded_model = pickle.load(fp)
        elif input_model_file_name.endswith(".prm"):
            loaded_model = torch.load(input_model_file_name)
            loaded_model.eval()
        elif input_model_file_name.endswith(".pkl"):
            with open(input_model_file_name, 'rb') as f:
                loaded_model = cloudpickle.load(f)
        else:
            loaded_model = load_model(input_model_file_name)
        return loaded_model

    def save_model(self, model, output_directory: str):
        """
        Pickle trained model
        :param  model: trained model
        :param  output_directory: pickled model file name
        """
        # Make output directory if it does not exist
        if os.path.exists(output_directory) is False:
            os.mkdir(output_directory)
        self.classifier.save_model(model, output_directory)

    def training(self, train_data, train_label, visualize=None):
        """
        Training with train data set
        :param   train_data:  training data
        :param   train_label: training data
        :param   visualize: True/False to visualize training history
        :return  model: trained   model
        """
        print("Train Started")
        if self.selected_classifier == "cnn" or self.selected_classifier == "resnet" or self.selected_classifier == "rnn":
            # Make Torch dataset loader for train
            train_loader, validation_loader = DataProcess.torch_train_data_loader(
                train_data, train_label, self.validation_rate)
            # Train model
            return self.classifier.training(train_loader,
                                            validation_loader,
                                            visualize=visualize)
        else:
            # Train model
            return self.classifier.training(train_data, train_label, visualize)

    def test(self, model, test_data, test_label) -> float:
        """
        Make predictions and output the result from a given model to test data set
        :param  model: trained model
        :param  test_data: test data
        :param  test_label: test data
        :return Over all test score (accuracy)
        """
        print("Test Started")
        if self.selected_classifier == "cnn" or self.selected_classifier == "resnet" or self.selected_classifier == "rnn":
            # Make Torch dataset loader for test
            test_loader = DataProcess.torch_test_data_loader(
                test_data, test_label)
            # Test model performance
            return self.classifier.test(model, test_loader)
        else:
            # Test model performance
            return self.classifier.test(model, test_data, test_label)

    def predict(self, model, target_data) -> list:
        """
        Make prediction to a given target data and return the prediction result with accuracy for each sample
        :param  model: trained model
        :param  target_data: target data
        :return prediction result with accuracy
        """
        return self.classifier.predict(model, target_data)

    def show_history(self, model_training):

        # Create the plot for training history
        # Accuracy
        plt.plot(model_training.history['acc'])
        plt.plot(model_training.history['val_acc'])
        plt.title('model accuracy')
        plt.ylabel('accuracy')
        plt.xlabel('epoch')
        plt.legend(['train', 'test'], loc='upper left')
        plt.show()

        # Loss
        plt.plot(model_training.history['loss'])
        plt.plot(model_training.history['val_loss'])
        plt.title('model loss')
        plt.ylabel('loss')
        plt.xlabel('epoch')
        plt.legend(['train', 'test'], loc='upper left')
        plt.show()
示例#6
0
class AudioFeatureExtraction:
    """
    Audio feature extraction to audio files
    Supported features: mfcc, spectral centroid,
    """

    # Initialization
    def __init__(self, setting_file: str):
        """
        Initialization for parameters and classes
        :param setting_file: config file
        """
        # Load parameters from config file
        self.cfg = ConfigReader(setting_file)
        self.sampling_rate = self.cfg.sampling_rate
        self.frame_time = self.cfg.frame_time
        self.overlap_rate = self.cfg.overlap_rate
        self.window_type = self.cfg.window_type
        self.fft_size = self.cfg.fft_size
        self.mod_fft_size = self.cfg.mod_fft_size
        self.window_size = int(self.sampling_rate*0.001*self.frame_time)
        self.hop_size = int(self.window_size*self.cfg.overlap_rate)

        # Initialize pre-processing
        self.APP = AudioPreProcess(self.frame_time, self.overlap_rate, self.window_type)

        # Feature selection
        self.short_feature_selection_dict = self.cfg.section_reader("short_feature_selection")
        self.long_feature_selection_dict = self.cfg.section_reader("long_feature_selection")
        self.short_feature_list = self.__init_short_feature_select()
        self.long_feature_list = self.__init_long_feature_select()

        # Initialize feature extraction classes
        self.mfcc = MFCC(self.cfg.mfcc_coeff, self.sampling_rate, self.fft_size, self.cfg.mfcc_total_filters)
        self.flux = Flux(self.sampling_rate)
        self.osc = OSC(self.cfg.osc_param, self.sampling_rate, self.fft_size)
        self.msf = MSF(self.cfg.omsc_param, self.sampling_rate, self.fft_size, self.mod_fft_size)

    def __init_short_feature_select(self) -> list:
        """
        Extract setting for short-term feature extraction from config file
        :return list of features to extract
        """
        short_feature_list = []
        for short_feature, switch in self.short_feature_selection_dict.items():
            if switch == "True":
                short_feature_list.append(short_feature)
        return short_feature_list

    def __init_long_feature_select(self) -> list:
        """
        Extract setting for long-term feature extraction from config file
        :return list of features to extract
        """
        long_feature_list = []
        for short_feature, switch in self.long_feature_selection_dict.items():
            if switch == "True":
                long_feature_list.append(short_feature)
        return long_feature_list

    def pre_processing(self, audio_file: str) -> tuple:
        """
        Pre-processing to audio file
        :param  audio_file: name of audio file
        :return tuple of pre-processed audio signal
        """
        return self.APP.apply(audio_file)

    # Feature extraction to one frame
    def extract_short_frame(self, framed_audio: tuple):
        """
        Short-term feature extraction to one frame
        :param  framed_audio: tuple of framed audio data from audio file
        :return power_spectrum: power_spectrum from short-term frame
        :return dictionary of extracted features from framed audio data
                {key: name of feature, value: tuple of features from all frames}
        """
        # Apply FFT
        spectrum = FFT.fft(framed_audio, self.fft_size)
        power_spectrum = FFT.power_fft(framed_audio, self.fft_size)

        # Apply feature extraction to a framed audio and store into a dictionary
        feature_dict = {}
        for short_feature in self.short_feature_list:
            if short_feature == "zcr":
                feature_dict[short_feature] = zerocrossing(framed_audio)
            if short_feature == "mfcc":
                feature_dict[short_feature] = self.mfcc.main(spectrum)
            if short_feature == "rms":
                feature_dict[short_feature] = rms(framed_audio)
            if short_feature == "centroid":
                feature_dict[short_feature] = centroid(power_spectrum, self.fft_size, self.sampling_rate)
            if short_feature == "rolloff":
                feature_dict[short_feature] = rolloff(power_spectrum, self.cfg.rolloff_param)
            if short_feature == "flux":
                feature_dict[short_feature] = self.flux.main(power_spectrum)
            if short_feature == "osc":
                feature_dict[short_feature] = self.osc.main(power_spectrum)
        return power_spectrum, feature_dict

    def extract_long_frame(self, long_frame_audio: list, long_frame_spectrum: list) -> dict:
        """
        Long-term feature extraction to one frame
        :param  long_frame_audio: list of audio data from short-term frame
        :param  long_frame_spectrum: list of spectrum from short-term frame
        :return dictionary of extracted features from framed audio data
                {key: name of feature, value: tuple of features from all frames}
        """
        # Store extracted features into a dictionary (key:name of feature, value: list of extracted features in frames)
        feature_dict = {}

        # Apply feature extraction to a framed audio and store into a dictionary
        for long_feature in self.long_feature_list:
            if long_feature == "low_energy":
                feature_dict[long_feature] = low_energy(long_frame_audio)
            if long_feature == "omsc":
                feature_dict[long_feature] = self.msf.omsc(long_frame_spectrum, self.mod_fft_size)
            if long_feature == "msfm":
                feature_dict[long_feature] = self.msf.msfm(long_frame_spectrum, self.mod_fft_size)
            if long_feature == "mscm":
                feature_dict[long_feature] = self.msf.mscm(long_frame_spectrum, self.mod_fft_size)
        return feature_dict

    def extract_entire_audio(self, input_audio_file: str):
        """
        Read audio file and extract Mel-spectrogram
        :param input_audio_file: Input audio file
        :return: Mel-spectrogram: Mel-spectrogram(currently) in numpy 2D array
        """
        # Read audio file and extract mel-spectrogram from entire audio signal
        return mel_spectrogram(input_audio_file, self.fft_size, self.cfg.num_mels, normalize=True)

    def extract_file(self, input_audio_file: str):
        """
        Feature extraction to one audio file
        :param  input_audio_file: name of the audio file
        :return dictionary of extracted features from audio file
                {key: name of feature, value: list of array(number of frames)}
        """
        # Prepare a dictionary to store extracted feature
        feature_dict = {}

        # Pre-processing to audio file
        processed_audio = self.pre_processing(input_audio_file)

        # Extract Mel-spectrogram from the entire audio
        feature_dict['mel_spectrogram'] = self.extract_entire_audio(input_audio_file)

        # Apply feature extraction to all frames and store into dictionary
        short_frame_number = 0
        long_frame_audio = []
        long_frame_power_spectrum = []

        # Store whole short-term features in list
        for short_frame_audio in processed_audio:
            # Extract short-term features
            short_frame_power_spectrum, short_feature_dict = self.extract_short_frame(short_frame_audio)

            # Store short-term features in dictionary
            for short_feature_type in self.short_feature_list:
                feature_dict.setdefault(short_feature_type, []).append(short_feature_dict[short_feature_type])

            # Extract long-term features when the number of short frames reach to a certain number
            if short_frame_number == self.cfg.long_frame_length:
                long_feature_dict = self.extract_long_frame(long_frame_audio, long_frame_power_spectrum)
                # Store long-term features in dictionary
                for long_feature in self.long_feature_list:
                    feature_dict.setdefault(long_feature, []).append(long_feature_dict[long_feature])

                # Reset cached short-term feature
                short_frame_number = 0
                long_frame_audio = []
                long_frame_power_spectrum = []

            # Update short-term feature cache
            short_frame_number += 1
            long_frame_audio.append(short_frame_audio)
            long_frame_power_spectrum.append(short_frame_power_spectrum)

        return feature_dict

    def extract_directory(self, input_directory: str):
        """
        Feature extraction to a folder which contains audio files
        :param  input_directory: folder name which has audio files
        :return dictionary of extracted features from audio file
                {key: name of files, value: list of extracted features}
        """
        # Extract file names in the input directory
        file_names = FileUtil.get_file_names(input_directory)

        # Extract features from audio files in a directory
        # file_feature_stat_dict = {}
        file_feature_dict = {}
        start = time.time()

        # Extract each audio file
        for count, audio_file in tqdm(enumerate(file_names)):
            # Extract features from one audio file
            file_feature_dict[audio_file] = self.extract_file(os.path.join(input_directory, audio_file))

        print("Extracted {0} with {1} \n".format(input_directory, time.time() - start))
        return file_feature_dict

    def extract_dataset(self, dataset_path: str):
        """
        Feature extraction to dataset
        Extract time series feature as 2D pandas dataframe and 3D numpy array, as well as label vector as list
        :param  dataset_path: path to dataset
        :return directory_files_feature_dict: dictionary of extracted features from all audio files in dataset folder
        {key: name of directory, value: list of file names {key: file name, value: list of extracted features}}
        :return label_list: list of numerical label vector
        """
        # Make label
        label_list = self.make_label_from_directory(dataset_path)

        # Get file names and store them into a dictionary
        directory_files_dict = {}
        for directory in FileUtil.get_folder_names(dataset_path, sort=True):
            directory_files_dict[directory] = FileUtil.get_file_names(os.path.join(dataset_path, directory))

        # Extract all features and store them into list
        directory_files_feature_dict = {}
        for directory, audio_files in tqdm(directory_files_dict.items()):
            # Apply feature extraction to one directory
            directory_files_feature_dict[directory] = self.extract_directory(os.path.join(dataset_path, directory))

        return directory_files_feature_dict, label_list

    @staticmethod
    def dict2array(directory_files_feature_dict: dict):
        """
        Convert extracted feature to numpy array
        :param directory_files_feature_dict: dictionary of extracted features from all audio files in dataset folder
        {key: name of directory, value: list of file names {key: file name, value: list of extracted features}}
        :return: expert_feature_2d_array: 2D Numpy array of extracted feature using expert system
        :return: mel_spectrogram_3d_array: 3D Numpy array of extracted mel-spectrogram
        """
        # Initialization
        processed_file = 0
        expert_feature_vector = []

        # Process for each class
        for class_name, file_feature_dict in directory_files_feature_dict.items():
            # Process for each file
            for file_name, feature_value_dict in file_feature_dict.items():
                file_feature_vector = []
                # Process for each feature
                for feature_name, feature in feature_value_dict.items():
                    # Take stats across frames for expert system and append to list
                    if type(feature) is list:
                        file_feature_array = np.array(feature[:])
                        if file_feature_array.ndim == 1:
                            file_feature_vector.append(np.mean(file_feature_array))
                        else:
                            file_feature_vector.extend(np.mean(file_feature_array, axis=0))
                    # Append mel-spectrogram to 3D array
                    else:
                        if processed_file == 0:
                            mel_spectrogram_3d_array = np.dstack((np.empty(np.shape(feature), int), feature))
                            mel_spectrogram_3d_array = mel_spectrogram_3d_array[:, :, 1]
                        else:
                            mel_spectrogram_3d_array = np.dstack((mel_spectrogram_3d_array, feature))

                # Append expert system feature vector
                expert_feature_vector.append(file_feature_vector)
                processed_file += 1

        # Transpose 3D array
        mel_spectrogram_3d_array = mel_spectrogram_3d_array.T
        # Convert list to 2D numpy array
        expert_feature_2d_array = np.array(expert_feature_vector)

        return expert_feature_2d_array, mel_spectrogram_3d_array

    @staticmethod
    def make_label_from_directory(dataset_path: str):
        # Init parameter
        dir_num = 0
        label_list = []

        # Iterate over directories
        for directory in FileUtil.get_folder_names(dataset_path, sort=True):
            # Make label as list
            label_list.extend([dir_num] * len(FileUtil.get_file_names(os.path.join(dataset_path, directory))))
            dir_num += 1
        return label_list

    @staticmethod
    def get_feature_stats(feature_frame_dict: dict, stat_type: str) -> dict:
        """
        # Store statistics from features into dictionary
        :param  feature_frame_dict:dictionary of extracted features from audio file
                {key: name of feature, value: list of array(number of frames)}
        :param  stat_type: type of statistics
        :return feature_stat_dict: features from one audio file with statistics
                {key: name of feature, value: array or single value}
        """
        # For each feature, compute statistical operation
        feature_stat_dict = {}

        for feature_name, values in feature_frame_dict.items():
            print(feature_name)
            if type(values[0]) is not list and values[0].ndim >= 2:
                if stat_type == "mean":
                    feature_frame_dict[feature_name] = np.mean(values[:], axis=0) + 1e-8
                elif stat_type == "std":
                    feature_stat_dict[feature_name] = np.std(values[:], axis=0)
            else:
                if stat_type == "mean":
                    feature_frame_dict[feature_name] -= np.mean(feature_frame_dict[feature_name], axis=0) + 1e-8
                elif stat_type == "std":
                    feature_stat_dict[feature_name] = get_std(feature_frame_dict[feature_name], "r")
        return feature_frame_dict
class AudioFeatureExtraction:
    """
    Audio feature extraction to audio files
    Supported features: mfcc, spectral centroid,
    """

    # Initialization
    def __init__(self, setting_file: str):
        """
        Initialization for parameters and classes
        :param setting_file: config file
        """
        # Load parameters from config file
        self.cfg = ConfigReader(setting_file)
        self.sampling_rate = self.cfg.sampling_rate
        self.frame_time = self.cfg.frame_time
        self.overlap_rate = self.cfg.overlap_rate
        self.window_type = self.cfg.window_type
        self.fft_size = self.cfg.fft_size
        self.mod_fft_size = self.cfg.mod_fft_size

        # Initialize pre-processing
        self.APP = AudioPreProcess(self.frame_time, self.overlap_rate, self.window_type)

        # Feature selection
        self.short_feature_selection_dict = self.cfg.section_reader("short_feature_selection")
        self.long_feature_selection_dict = self.cfg.section_reader("long_feature_selection")
        self.short_feature_list = self.__init_short_feature_select()
        self.long_feature_list = self.__init_long_feature_select()

        # Initialize feature extraction classes
        self.mfcc = MFCC(self.cfg.mfcc_coeff, self.sampling_rate, self.fft_size, self.cfg.mfcc_total_filters)
        self.flux = Flux(self.sampling_rate)
        self.osc = OSC(self.cfg.osc_param, self.sampling_rate, self.fft_size)
        self.msf = MSF(self.cfg.omsc_param, self.sampling_rate, self.fft_size, self.mod_fft_size)

    def __init_short_feature_select(self) -> list:
        """
        Extract setting for short-term feature extraction from config file
        :return list of features to extract
        """
        short_feature_list = []
        for short_feature, switch in self.short_feature_selection_dict.items():
            if switch == "True":
                short_feature_list.append(short_feature)
        return short_feature_list

    def __init_long_feature_select(self) -> list:
        """
        Extract setting for long-term feature extraction from config file
        :return list of features to extract
        """
        long_feature_list = []
        for short_feature, switch in self.long_feature_selection_dict.items():
            if switch == "True":
                long_feature_list.append(short_feature)
        return long_feature_list

    # Pre-processing
    def pre_processing(self, audio_file: str) -> tuple:
        """
        Pre-processing to audio file
        :param  audio_file: name of audio file
        :return tuple of pre-processed audio signal
        """
        return self.APP.apply(audio_file)

    # Feature extraction to one frame
    def extract_short_frame(self, framed_audio: tuple):
        """
        Short-term feature extraction to one frame
        :param  framed_audio: tuple of framed audio data from audio file
        :return power_spectrum: power_spectrum from short-term frame
        :return dictionary of extracted features from framed audio data
                {key: name of feature, value: tuple of features from all frames}
        """
        # Store extracted features into a dictionary (key:name of feature, value: list of extracted features in frames)
        feature_dict = {}
        # Apply feature extraction to a framed audio and store into a dictionary
        # TODO: Use While True iterate over selected feature and use apply for each feature extraction method
        for short_feature in self.short_feature_list:
            # Apply feature extraction
            spectrum = FFT.fft(framed_audio, self.fft_size)
            power_spectrum = FFT.power_fft(framed_audio, self.fft_size)

            if short_feature == "zcr":
                feature_dict[short_feature] = zerocrossing(framed_audio)
            if short_feature == "mfcc":
                feature_dict[short_feature] = self.mfcc.main(spectrum)
            if short_feature == "rms":
                feature_dict[short_feature] = rms(framed_audio)
            if short_feature == "centroid":
                feature_dict[short_feature] = centroid(power_spectrum, self.fft_size, self.sampling_rate)
            if short_feature == "rolloff":
                feature_dict[short_feature] = rolloff(power_spectrum, self.cfg.rolloff_param)
            if short_feature == "flux":
                feature_dict[short_feature] = self.flux.main(power_spectrum)
            if short_feature == "osc":
                feature_dict[short_feature] = self.osc.main(power_spectrum)
            if short_feature == "mel_spectrogram":
                # Mel-spectrum needs to be stored to be converted later
                feature_dict[short_feature] = self.mfcc.mel_spectrum(spectrum)
        return power_spectrum, feature_dict

        # Feature extraction to one frame
    def extract_long_frame(self, long_frame_audio: list, long_frame_spectrum: list) -> dict:
        """
        Long-term feature extraction to one frame
        :param  long_frame_audio: list of audio data from short-term frame
        :param  long_frame_spectrum: list of spectrum from short-term frame
        :return dictionary of extracted features from framed audio data
                {key: name of feature, value: tuple of features from all frames}
        """
        # Store extracted features into a dictionary (key:name of feature, value: list of extracted features in frames)
        feature_dict = {}

        # Apply feature extraction to a framed audio and store into a dictionary
        for long_feature in self.long_feature_list:
            if long_feature == "low_energy":
                feature_dict[long_feature] = low_energy(long_frame_audio)
            if long_feature == "omsc":
                feature_dict[long_feature] = self.msf.omsc(long_frame_spectrum, self.mod_fft_size)
            if long_feature == "msfm":
                feature_dict[long_feature] = self.msf.msfm(long_frame_spectrum, self.mod_fft_size)
            if long_feature == "mscm":
                feature_dict[long_feature] = self.msf.mscm(long_frame_spectrum, self.mod_fft_size)
        return feature_dict

    def extract_file(self, input_audio_file: str):
        """
        Feature extraction to one audio file
        :param  input_audio_file: name of the audio file
        :return dictionary of extracted features from audio file
                {key: name of feature, value: list of array(number of frames)}
        :return file_short_feature_list: short-term features across whole audio file stored into list
        """
        # Pre-processing to audio file
        processed_audio = self.pre_processing(input_audio_file)

        # Apply feature extraction to all frames and store into dictionary
        feature_dict = {}
        frame_number = 0
        long_frame_audio = []
        long_frame_power_spectrum = []

        # Store whole short-term features in list
        file_short_feature_list = []
        for short_frame_audio in processed_audio:
            # Extract short-term features
            short_frame_power_spectrum, short_feature_dict = self.extract_short_frame(short_frame_audio)
            # Create a feature vector and append
            file_short_feature_list.append(DataProcess.flatten_list(list(short_feature_dict.values())))

            # Store short-term features in dictionary
            for short_feature_type in self.short_feature_list:
                feature_dict.setdefault(short_feature_type, []).append(short_feature_dict[short_feature_type])

            # Extract long-term features
            if frame_number == self.cfg.long_frame_length:
                long_feature_dict = self.extract_long_frame(long_frame_audio, long_frame_power_spectrum)
            # Store long-term features in dictionary
                for long_feature in self.long_feature_list:
                    feature_dict.setdefault(long_feature, []).append(long_feature_dict[long_feature])

            # Reset for long-term feature
                frame_number = 0
                long_frame_audio = []
                long_frame_power_spectrum = []

            # Update short frame stack
            frame_number += 1
            long_frame_audio.append(short_frame_audio)
            long_frame_power_spectrum.append(short_frame_power_spectrum)

        return feature_dict, file_short_feature_list

    def extract_directory(self, input_directory: str, stats_type: str):
        """
        Feature extraction to a folder which contains audio files
        :param  input_directory: folder name which has audio files
        :param  stats_type: type of statistics
        :return dictionary of extracted features from audio file
                {key: name of feature, value: list of array(number of frames)}
        :return directory_short_feature_list: short-term features extracted from all audio files from one directory
        """
        # Extract file names in the input directory
        file_names = FileUtil.get_file_names(input_directory)

        # Extract features from audio files in a directory
        file_feature_stat_dict = {}
        start = time.time()

        # Extract each audio file
        for count, audio_file in enumerate(file_names):
            # Extract features from one audio file
            frame_extracted_feature, file_short_feature_list = self.extract_file(os.path.join(input_directory, audio_file))

            # Append short-term feature to 3D array
            if count == 0:
                directory_3d_feature = np.array(file_short_feature_list).T
                print("Data structure is {}".format(directory_3d_feature.shape))
            else:
                directory_3d_feature = np.dstack((directory_3d_feature, np.array(file_short_feature_list).T))
                print("Data structure is {}".format(directory_3d_feature.shape))

            # Take stats across frames
            file_feature_stat_dict[audio_file] = self.get_feature_stats(frame_extracted_feature, stats_type)

        end = time.time()
        print("Extracted {0} with {1} \n".format(input_directory, end - start))

        return file_feature_stat_dict, directory_3d_feature

    def extract_dataset(self, dataset_path: str, stats_type: str):
        """
        Feature extraction to dataset
        Extract time series feature as 2D pandas dataframe and 3D numpy array, as well as label vector as list
        :param  dataset_path: path to dataset
        :param  stats_type: type of statistics for 2D feature
        :return all_2d_dataframe: 2D feature pandas dataframe across all frames
        :return all_3d_array: 3D feature numpy array across all frames
        :return label_list: list of numerical label vector
        """
        # Get folder names under data set path
        directory_names = FileUtil.get_folder_names(dataset_path, sort=True)

        # Get file names and store them into a dictionary
        directory_files_dict = {}
        for directory in directory_names:
            directory_files_dict[directory] = FileUtil.get_file_names(os.path.join(dataset_path, directory))

        # Extract all features and store them into list
        all_2d_dataframe = pd.DataFrame()
        dir_num = 0
        label_list = []
        for directory, audio_files in directory_files_dict.items():
            # Apply feature extraction to a directory
            file_feature_stat_dict, class_3d_feature = self.extract_directory(os.path.join(dataset_path, directory), stats_type)

            # Convert dictionary to data frame
            class_2d_dataframe = DataProcess.dict2dataframe(file_feature_stat_dict, segment_feature=True)

            # Add label to 2D feature data frame
            class_2d_dataframe_with_label = DataProcess.add_label(class_2d_dataframe, directory)

            # Combine 2D feature data frame
            all_2d_dataframe = all_2d_dataframe.append(class_2d_dataframe_with_label)

            # Append 3D arrays
            if dir_num == 0:
                all_3d_array = class_3d_feature
            else:
                all_3d_array = np.dstack((all_3d_array, class_3d_feature))

            # Make label as list
            a = [dir_num] * len(audio_files)
            label_list.extend(a)
            dir_num += 1

        # Transpose 3D array
        all_3d_array = all_3d_array.T

        return all_2d_dataframe, all_3d_array, label_list

    @staticmethod
    def get_feature_stats(feature_frame_dict: dict, stat_type: str) -> dict:
        """
        # Store statistics from features into dictionary
        :param  feature_frame_dict:dictionary of extracted features from audio file
                {key: name of feature, value: list of array(number of frames)}
        :param  stat_type: type of statistics
        :return feature_stat_dict: features from one audio file with statistics
                {key: name of feature, value: array or single value}
        """
        # For each feature, compute statistical operation
        feature_stat_dict = {}

        for feature, frame in feature_frame_dict.items():
            if stat_type == "mean":
                feature_stat_dict[feature] = get_mean(feature_frame_dict[feature], "r")
            elif stat_type == "std":
                feature_stat_dict[feature] = get_std(feature_frame_dict[feature], "r")
        return feature_stat_dict