Пример #1
0
def test_spectrogram_256():
    n_fft = 256
    sample_rate = 250000
    samples = sample_rate * 1
    spectrogram = Audio.load_tone(
        20, samples, 'sine', sample_rate).get_spectrogram_array(n_fft=n_fft)
    spectrogram = to_uint8(spectrogram)

    assert spectrogram.shape[0] == (n_fft / 2) + 1, 'spectrogram height'
    assert spectrogram.shape[1] == int(
        samples * 2 / n_fft) + 1, 'spectrogram width'
    assert np.array([
        all(0 <= y <= 255 for y in x) for x in spectrogram
    ]).all(), f'range check {spectrogram} failed. Expected all values 0-255'
Пример #2
0
def test_mel_spectrogram_256():
    n_fft = 256
    hop_length = 128
    n_mels = 60

    sample_rate = 384000
    samples = sample_rate * 1

    spectrogram = Audio.load_tone(20, samples, 'sine',
                                  sample_rate).get_mel_spectrogram_array(
                                      n_fft=n_fft,
                                      n_mels=n_mels,
                                      hop_length=hop_length)
    spectrogram = to_uint8(spectrogram)
    print(spectrogram.shape)

    assert spectrogram.shape[0] == n_mels, 'spectrogram height'
    assert spectrogram.shape[1] == int(
        samples * 2 / n_fft) + 1, 'spectrogram width'
    assert np.array([
        all(0 <= y <= 255 for y in x) for x in spectrogram
    ]).all(), f'range check {spectrogram} failed. Expected all values 0-255'
Пример #3
0
    def generate_jonnor_mnist(self,
                              preprocessed,
                              train_folds=[1, 2, 3, 4],
                              test_folds=[5]):
        x_train = []
        y_train = []
        s_train = []
        x_test = []
        y_test = []
        s_test = []

        for fold, filename, target, category, take, spectrogram, settings in preprocessed:
            d = to_uint8(spectrogram).flatten()
            if fold in train_folds:
                x_train.append(d)
                y_train.append(target)
                s_train.append(spectrogram.shape)
            elif fold in test_folds:
                x_test.append(d)
                y_test.append(target)
                s_test.append(spectrogram.shape)

        return x_train, y_train, s_train, x_test, y_test, s_test
Пример #4
0
    def to_mnist(self,
                 train_folds=[],
                 test_folds=[],
                 cache_path: str = None,
                 n_fft: int = 1024,
                 hop_length: int = None,
                 flatten=True):

        # (x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()
        # x_train = arrays of data in [0:255] range
        # y_train = label (class)

        #Setup Caching
        cache_training_fold: str = "".join([str(x) for x in train_folds])
        cache_testing_fold: str = "".join([str(x) for x in test_folds])

        cache_file = f'mnist_{str(n_fft)}_{str(hop_length)}_{str(int(flatten))}_{cache_training_fold}_{cache_testing_fold}.pkl'
        cache_filename = os.path.join(cache_path, cache_file)

        if cache_path is not None:
            print(f'Caching: {cache_filename}')
            if os.path.exists(cache_filename):
                with open(cache_filename, 'rb') as f:
                    return pickle.load(f)

        if hop_length is None:
            hop_length = int(n_fft / 4)

        all_records = list(
            zip(self.get_folds(), self.get_filenames(), self.get_targets()))

        x_train = []
        y_train = []
        s_train = []

        x_test = []
        y_test = []
        s_test = []

        for folds, x, y, s in tqdm([(train_folds, x_train, y_train, s_train),
                                    (test_folds, x_test, y_test, s_test)],
                                   desc='Fold'):
            for fold, filename, target in tqdm(
                [r for r in all_records if int(r[0]) in folds], desc='File'):
                audio = self.get_audio(filename)

                if audio is None:
                    continue

                # Get a Numpy array of the spectrogram
                spectrogram = audio.get_spectrogram_array(
                    n_fft=n_fft, hop_length=hop_length, mode='dB')

                # Normalise - Subtract mean and divide by Standard deviation
                spectrogram = to_uint8(spectrogram)

                # Flatten  spectrogram if required and store
                if flatten:
                    x.append(spectrogram.flatten())
                else:
                    x.append(spectrogram)

                y.append(target)
                s.append(spectrogram.shape)

        if cache_path is not None:
            with open(cache_filename, 'wb') as f:
                pickle.dump(
                    (x_train, y_train, s_train, x_test, y_test, s_test), f)

        return x_train, y_train, s_train, x_test, y_test, s_test