示例#1
0
def extract(x, sr=16000):
    f_global = []

    # 34D short-term feature
    f = audioFeatureExtraction.stFeatureExtraction(x, sr,
                                                   globalvars.frame_size * sr,
                                                   globalvars.step * sr)

    # for pyAudioAnalysis which support python3
    if type(f) is tuple:
        f = f[0]

    # Harmonic ratio and pitch, 2D
    hr_pitch = audioFeatureExtraction.stFeatureSpeed(
        x, sr, globalvars.frame_size * sr, globalvars.step * sr)
    f = np.append(f, hr_pitch.transpose(), axis=0)

    # Z-normalized
    f = stats.zscore(f, axis=0)

    f = f.transpose()

    f_global.append(f)

    f_global = sequence.pad_sequences(f_global,
                                      maxlen=globalvars.max_len,
                                      dtype='float32',
                                      padding='post',
                                      value=globalvars.masking_value)

    return f_global
    def extract_dataset(self, data, nb_samples, dataset, save=True):
        f_global = []

        i = 0
        for (x, Fs) in data:
            # 34D short-term feature
            f = audioFeatureExtraction.stFeatureExtraction(x, Fs, globalvars.frame_size * Fs, globalvars.step * Fs)

            # Harmonic ratio and pitch, 2D
            hr_pitch = audioFeatureExtraction.stFeatureSpeed(x, Fs, globalvars.frame_size * Fs, globalvars.step * Fs)
            f = np.append(f, hr_pitch.transpose(), axis=0)

            # Z-normalized
            f = stats.zscore(f, axis=0)

            f = f.transpose()

            f_global.append(f)

            sys.stdout.write("\033[F")
            i = i + 1
            print("Extracting features " + str(i) + '/' + str(nb_samples) + " from data set...")

        f_global = sequence.pad_sequences(f_global,
                                          maxlen=globalvars.max_len,
                                          dtype='float32',
                                          padding='post',
                                          value=globalvars.masking_value)

        if save:
            print("Saving features to file...")
            cPickle.dump(f_global, open(dataset + '_features.p', 'wb'))

        return f_global
示例#3
0
def extract_features(dataset):
    data = dataset.data
    nb_samples = len(dataset.targets)
    frame_size = dataset.frame_size
    step = dataset.step
    f_global = []

    i = 0
    for (x, Fs) in data:
        # 34D short-term feature
        f = audioFeatureExtraction.stFeatureExtraction(x, Fs, frame_size * Fs, step * Fs)

        # for pyAudioAnalysis which support python3
        if type(f) is tuple:
            f = f[0]

        # Harmonic ratio and pitch, 2D
        hr_pitch = audioFeatureExtraction.stFeatureSpeed(x, Fs, frame_size * Fs, step * Fs)
        f = np.append(f, hr_pitch.transpose(), axis=0)

        # Z-normalized
        f = stats.zscore(f, axis=0)

        f = f.transpose()
        f = np.mean(f, axis=0)

        f_global.append(f)

        sys.stdout.write("\033[F")
        i = i + 1
        print("\t Extracting features " + str(i) + '/' + str(nb_samples) + " from data set...")

    return f_global
示例#4
0
    def _get_batches_of_transformed_samples(self, index_array):
        batch_x = []
        for i, j in enumerate(index_array):
            x = self.x[j]

            # Augmentation
            if self.audio_data_generator.white_noise_:
                x = self.audio_data_generator.white_noise(x)
            if self.audio_data_generator.shift_:
                x = self.audio_data_generator.shift(x)
            if self.audio_data_generator.stretch_:
                x = self.audio_data_generator.stretch(x)

            # 34D short-term feature
            f = audioFeatureExtraction.stFeatureExtraction(
                x, self.sr, globalvars.frame_size * self.sr,
                globalvars.step * self.sr)

            # Harmonic ratio and pitch, 2D
            hr_pitch = audioFeatureExtraction.stFeatureSpeed(
                x, self.sr, globalvars.frame_size * self.sr,
                globalvars.step * self.sr)
            x = np.append(f, hr_pitch.transpose(), axis=0)

            # Z-normalized
            x = stats.zscore(x, axis=0)

            x = x.transpose()

            batch_x.append(x)

        batch_x = sequence.pad_sequences(batch_x,
                                         maxlen=globalvars.max_len,
                                         dtype='float32',
                                         padding='post',
                                         value=globalvars.masking_value)

        batch_u = np.full((
            len(index_array),
            globalvars.nb_attention_param,
        ),
                          globalvars.attention_init_value,
                          dtype=np.float32)

        if self.y is None:
            return [batch_u, batch_x]
        batch_y = self.y[index_array]

        return [batch_u, batch_x], batch_y
示例#5
0
def extract_features(dataset):
    data = dataset.data
    nb_samples = len(dataset.targets)
    frame_size = dataset.frame_size
    step = dataset.step
    f_global = []
    f_global_concatenate = pd.DataFrame()

    i = 0
    for (x, Fs) in data:
        # 34D short-term feature
        f = audioFeatureExtraction.stFeatureExtraction(x, Fs, frame_size * Fs,
                                                       step * Fs)

        # for pyAudioAnalysis which support python3
        if type(f) is tuple:
            f = f[0]

        # Harmonic ratio and pitch, 2D
        hr_pitch = audioFeatureExtraction.stFeatureSpeed(
            x, Fs, frame_size * Fs, step * Fs)
        f = np.append(hr_pitch.transpose(), f, axis=0)

        f = f.transpose()

        f_global.append(f)

        f_global_concatenate = pd.concat(
            [f_global_concatenate, pd.DataFrame(f)], axis=0, ignore_index=True)

        sys.stdout.write("\033[F")
        i = i + 1
        print("Extracting features " + str(i) + '/' + str(nb_samples) +
              " from data set...")

    return f_global, f_global_concatenate
    def extract(self, x, Fs=16000):
        f_global = []

        # 34D short-term feature
        f = audioFeatureExtraction.stFeatureExtraction(x, Fs, globalvars.frame_size * Fs, globalvars.step * Fs)

        # Harmonic ratio and pitch, 2D
        hr_pitch = audioFeatureExtraction.stFeatureSpeed(x, Fs, globalvars.frame_size * Fs, globalvars.step * Fs)
        f = np.append(f, hr_pitch.transpose(), axis=0)

        # Z-normalized
        f = stats.zscore(f, axis=0)

        f = f.transpose()

        f_global.append(f)

        f_global = sequence.pad_sequences(f_global,
                                          maxlen=globalvars.max_len,
                                          dtype='float32',
                                          padding='post',
                                          value=globalvars.masking_value)

        return f_global