def train_basic_audio_classifier(num_classes, model_savepath, training_data_folder,training_epoch, batch_size = 128, amount_limit=5000, forceLoad = True):
"""
train a basic audio classification model.
"""

    epochs = training_epoch
    kernel_size = 5
    framerate = 16384

    (x_train, y_train), (x_test, y_test) = load_audio(training_data_folder, num_classes, forceLoad=forceLoad, framerate=framerate,amount_limit=amount_limit)

    print('x_train shape:', x_train.shape)
    print('x_test shape:', x_test.shape)



    #save_sound(x_train, "classification","xtrain",upscale=False)
    #save_sound(x_test, "classification","xtest",upscale=False)

    input_shape = (x_train.shape[1],1)
    convolution_layers = count_convolutions(input_shape, kernel_size)

    model = keras.models.Sequential()
    model.add(Conv1D(16, kernel_size=kernel_size, activation="selu", strides=2, input_shape=input_shape, padding="same"))
    for i in range(convolution_layers):
        model.add(Conv1D(32, kernel_size=kernel_size, activation="selu", strides=2,padding="same"))
    model.add(Flatten())
    model.add(Dropout(0.5))
    model.add(Dense(128, activation="selu"))
    model.add(Dropout(0.5))

    model.add(Dense(num_classes, activation='softmax'))
    model.compile(loss=keras.losses.categorical_crossentropy,
              optimizer=keras.optimizers.Adam(0.0005),
              metrics=['accuracy'])
    model.summary()
    #model.fit(np.expand_dims(x_train, axis=2), y_train,
    #          batch_size=batch_size,
    #          epochs=epochs,
    #          verbose=1,
    #          validation_data=(np.expand_dims(x_test, axis=2), y_test))
    model.fit(x_train, y_train,
          batch_size=batch_size,
          epochs=epochs,
          verbose=1,
          validation_data=(x_test, y_test))

    score = model.evaluate(x_test, y_test, verbose=0)

    print('Test loss:', score[0])
    print('Test accuracy:', score[1])

    model.save(model_savepath)
    print('model saved to ', model_savepath)
sys.path.insert(0, 'tools')

from audio_tools import count_convolutions
from audio_loader import load_audio
import keras
from keras.layers import Dense, Dropout, Flatten, LeakyReLU
from keras.layers import Conv1D

import os

os.environ["CUDA_VISIBLE_DEVICES"] = "0"

num_classes = 10

(x_train, y_train), (x_test, y_test) = load_audio("speech_commands",
                                                  num_classes)

batch_size = 30
epochs = 50
kernel_size = 5

input_shape = (x_train.shape[1], 1)
convolution_layers = count_convolutions(input_shape, kernel_size)

model = keras.models.Sequential()
model.add(Flatten(input_shape=input_shape))
model.add(Dense(512))
model.add(LeakyReLU(alpha=0.2))
model.add(Dense(256))
model.add(LeakyReLU(alpha=0.2))
Пример #3
0
from audio_loader import load_audio
from playsound import play_and_save_sound


def check_samples(samples):
    min = 10000
    max = 0
    for x in samples:
        for i in x:
            if (i < min):
                min = i
                print(min)
            if (i > max):
                max = i
                print(max)
    print("max wads " + str(max))
    print("min was :" + str(min))


(x_train, y_train), (x_test, y_test) = load_audio("speech_commands", 10)

#play_and_save_sound(x_train, "test")
check_samples(x_train)
Пример #4
0
                                        *self._args, **self._kwargs))
        self._model.add(Lambda(lambda x: x[:,0]))
        self._model.summary()
        super(Conv1DTranspose, self).build(input_shape)

    def call(self, x):
        return self._model(x)

    def compute_output_shape(self, input_shape):
        return self._model.compute_output_shape(input_shape)

# this is the size of our encoded representations
encoding_dim = 32  # 32 floats -> compression of factor 24.5, assuming the input is 784 floats

os.environ["CUDA_VISIBLE_DEVICES"]="0"
(x_train, _), (x_test, _) = load_audio("nsynth", 10, framerate=16384, forceLoad=True)
samples = x_train.shape[1]
print(samples)
channels = 1
input_shape = (x_train.shape[1],1)
kernel_size = 5
folder_name = "nsynthnormalized"
# this is our input placeholder
input_clip = Input(shape=(samples,1,))
# "encoded" is the encoded representation of the input

convolution_layers = count_convolutions(input_shape, kernel_size)

encoded = Conv1D(16, kernel_size=kernel_size, activation='selu', strides=2, input_shape=input_shape, padding="same")(input_clip)
for i in range(convolution_layers):
    encoded = Conv1D(16, kernel_size=kernel_size, activation='selu', strides=2,padding="same")(encoded)
Пример #5
0
from audio_tools import count_convolutions
from audio_loader import load_audio
import keras
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv1D
from playsound import save_sound

import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

num_classes = 10

(x_train, y_train), (x_test, y_test) = load_audio("nsynth",
                                                  num_classes,
                                                  forceLoad=False,
                                                  framerate=16384)

batch_size = 30
epochs = 50
kernel_size = 5

save_sound(x_train, "classification", "xtrain", upscale=False)
save_sound(x_test, "classification", "xtest", upscale=False)

input_shape = (x_train.shape[1], 1)
convolution_layers = count_convolutions(input_shape, kernel_size)

model = keras.models.Sequential()
model.add(
    Conv1D(16,
           kernel_size=kernel_size,

def next_batch(num, data, labels):
    idx = np.arange(0, len(data))
    np.random.shuffle(idx)
    idx = idx[:num]
    data_shuffle = [data[i] for i in idx]
    labels_shuffle = [labels[i] for i in idx]

    return np.asarray(data_shuffle), np.asarray(labels_shuffle)


nClasses = 10

(x_train, y_train), (x_test, y_test) = load_audio("speech_commands",
                                                  nClasses,
                                                  reshape=False)

sess = tf.InteractiveSession()

#declare input placeholders to which to upload data
tfX = tf.placeholder(dtype=tf.float32, shape=[None, x_train.shape[1]])
tfY = tf.placeholder(dtype=tf.float32, shape=[None, nClasses])

#build model
layer = tf.layers.dense(tfX, 64, activation=tf.nn.selu)
layer = tf.layers.dense(layer, 10)
loss = tf.losses.softmax_cross_entropy(tfY, layer)
optimizer = tf.train.AdamOptimizer()
optimize = optimizer.minimize(loss)