def create_model(self): # The adapt method, when called on the training data, calculates mean and standard deviation norm_layer = preprocessing.Normalization() norm_layer.adapt( self.network_instance.train_data.map(lambda x, _: x)) # Add all the layers input_ = layers.Input( shape=self.network_instance.input_shape) # (124, 129, 1) resize = preprocessing.Resizing(32, 32)(input_) norm_layer = norm_layer(resize) conv1 = layers.Conv2D(32, 3, activation='relu')(norm_layer) pooling_1 = layers.MaxPooling2D()(conv1) conv2 = layers.Conv2D(64, 3, activation='relu')(pooling_1) pooling_2 = layers.MaxPooling2D()(conv2) dropout_1 = layers.Dropout(0.25)(pooling_2) flatten = layers.Flatten()(dropout_1) dense_1 = layers.Dense(128, activation='relu')(flatten) dropout_2 = layers.Dropout(0.5)(dense_1) output_ = layers.Dense(len( self.network_instance.classes))(dropout_2) # build the model self.model = tf.keras.Model(inputs=input_, outputs=output_) self.model.summary()
def __call__(self, inputs, **kwarg): if 'num_of_classes' in kwarg.keys(): number_of_classes = kwarg['num_of_classes'] if 'input_shape' in kwarg.keys(): input_shape = kwarg['input_shape'] spectrogram = tf.signal.stft(inputs, frame_length=255, frame_step=128) spectrogram = tf.abs(spectrogram) spectrogram = tf.expand_dims(spectrogram, -1) # spectrogram = np.asarray(spectrogram) spectrogram = tf.stack(spectrogram) print("\n******************************\n") print(spectrogram.shape, spectrogram) print("\n******************************\n") # input_vec = tf.keras.Input(shape=(spectrogram.shape[1], spectrogram.shape[2], 1)) # x = preprocessing.Resizing(64, 64)(input_vec) x = preprocessing.Resizing(32, 32)(spectrogram) x = preprocessing.Normalization()(x) # x = preprocessing.Normalization()(x) # x = layers.Conv2D(32, 3, activation='relu')(input_vec) x = layers.Conv2D(32, 3, activation='relu')(x) # x = layers.Conv2D(32, 3, activation='relu')(x) # x = layers.MaxPooling2D()(x) # x = layers.Conv2D(64, 3, activation='relu')(x) x = layers.Conv2D(64, 3, activation='relu')(x) x = layers.MaxPooling2D()(x) x = layers.Dropout(0.25)(x) x = layers.Flatten()(x) x = layers.Dense(128, activation='relu')(x) x = layers.Dropout(0.5)(x) answer = layers.Dense(number_of_classes, activation='softmax')(x) return answer
def return_resizing_data(data): data = np.expand_dims(data, axis=-1) result = preprocessing.Resizing(32, 32)(data) result = np.squeeze(result, axis=-1) return result
def MinimumCNN(spectrogram_ds): for spec in spectrogram_ds.take(1): input_shape = spec.shape print('Input shape:', input_shape) num_labels = 2 norm_layer = preprocessing.Normalization() norm_layer.adapt(spectrogram_ds.map(lambda x: x)) model = models.Sequential([ layers.Input(shape=input_shape), preprocessing.Resizing(32, 32), norm_layer, layers.Conv2D(32, 3, activation='relu'), layers.Conv2D(64, 3, activation='relu'), layers.MaxPooling2D(), layers.Dropout(0.25), layers.Flatten(), layers.Dense(128, activation='relu'), layers.Dropout(0.5), layers.Dense(num_labels), ]) model.compile( optimizer=tf.keras.optimizers.Adam(), loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), metrics=['accuracy']) return model
def build_cnn(resizing=(32, 32), conv2d1=32, conv2d2=64, dropout1=0.25, dropout2=0.5, dense=128, learning_rate=0.0001): from tensorflow import keras from tensorflow.keras.layers.experimental import preprocessing model = keras.Sequential() input_shape = (X_train.shape[1], X_train.shape[2], X_train.shape[3]) outputs = len(set(y_train)) model.add(keras.layers.Input(shape=input_shape)) model.add(preprocessing.Resizing(resizing[0], resizing[1])) model.add(keras.layers.Conv2D(conv2d1, 3, activation='relu')) model.add(keras.layers.Conv2D(conv2d2, 3, activation='relu')) model.add(keras.layers.MaxPooling2D()) model.add(keras.layers.Dropout(dropout1)) model.add(keras.layers.Flatten()) model.add(keras.layers.Dense(dense, activation='relu')) model.add(keras.layers.Dropout(dropout2)) model.add(keras.layers.Dense(outputs, activation='softmax')) optimizer = keras.optimizers.Adam(learning_rate=learning_rate) model.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy']) return model
def __call__(self, inputs, **kwarg): x0 = tf.signal.stft(inputs, frame_length=255, frame_step=128) x0 = tf.abs(x0) x0 = tf.expand_dims(x0, -1) x0 = preprocessing.Resizing(32, 32)(x0) x0 = layers.BatchNormalization()(x0) x0 = layers.Conv2D(64, (3, 3), padding='same', activation='relu')(x0) x0 = layers.MaxPooling2D((2, 2), padding='same')(x0) x0 = layers.BatchNormalization()(x0) x0 = layers.Conv2D(128, (3, 3), padding='same', activation='relu')(x0) x0 = layers.MaxPooling2D((2, 2), padding='same')(x0) x0 = layers.BatchNormalization()(x0) x0 = layers.Conv2D(192, (3, 3), padding='same', activation='relu')(x0) x0 = layers.MaxPooling2D((2, 2), padding='same')(x0) x0 = layers.BatchNormalization()(x0) x0 = layers.Conv2D(256, (3, 3), padding='same', activation='relu')(x0) x0 = layers.MaxPooling2D((2, 2), padding='same')(x0) result = layers.BatchNormalization()(x0) return result
def call(self, inputs, **kwargs): height = tf.cast(tf.shape(inputs)[1], tf.float32) width = tf.cast(tf.shape(inputs)[2], tf.float32) if self.min_side is not None and self.max_side is not None: cur_min_side = tf.minimum(width, height) min_side = tf.cast(self.min_side, tf.float32) cur_max_side = tf.maximum(width, height) max_side = tf.cast(self.max_side, tf.float32) scale = tf.minimum(max_side / cur_max_side, min_side / cur_min_side) elif self.min_side is not None: cur_min_side = tf.minimum(width, height) min_side = tf.cast(self.min_side, tf.float32) scale = min_side / cur_min_side else: cur_max_side = tf.maximum(width, height) max_side = tf.cast(self.max_side, tf.float32) scale = max_side / cur_max_side new_height = tf.cast(height * scale, tf.int32) new_width = tf.cast(width * scale, tf.int32) resized = preprocessing.Resizing( height=new_height, width=new_width, interpolation=self.interpolation)(inputs) return resized
def build_model(train_ds): """ Build the ML model. Sets up the desired layers and compiles the tf.keres model :param train_ds: The training dataset used for nomalisation and determining the input_shape """ # Get the input shape for the model for spectrogram, _ in train_ds.take(1): input_shape = spectrogram.shape[1:] print(f'Input shape: {input_shape}') # Normalisation Layer norm_layer = preprocessing.Normalization() norm_layer.adapt(train_ds.take(30).map(lambda x, _: x)) #Model layout model = models.Sequential([ layers.InputLayer(input_shape=input_shape), preprocessing.Resizing(32, 32), norm_layer, layers.Conv2D(60, 3, activation='relu'), #layers.Conv2D(30, 3, activation='relu'), #layers.Conv2D(30, 3, activation='relu'), #layers.Conv2D(30, 3, activation='relu'), layers.Flatten(), layers.Dense(30, activation='relu'), #layers.Dropout(0.1), layers.Dense(2), ]) return model
def build_cnn(documents, classes, flat=False): from tensorflow import keras from tensorflow.keras.layers.experimental import preprocessing model = keras.Sequential() input_shape = (documents.shape[1], documents.shape[2], documents.shape[3]) outputs = len(set(classes)) model.add(keras.layers.Input(shape=input_shape)) model.add(preprocessing.Resizing(32, 32)) model.add(keras.layers.Conv2D(32, 3, activation='relu')) model.add(keras.layers.Conv2D(64, 3, activation='relu')) model.add(keras.layers.MaxPooling2D()) model.add(keras.layers.Dropout(0.25)) model.add(keras.layers.Flatten()) model.add(keras.layers.Dense(128, activation='relu')) model.add(keras.layers.Dropout(0.5)) model.add(keras.layers.Dense(outputs, activation='softmax')) optimizer = keras.optimizers.Adam(learning_rate=0.0001) model.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy']) return model
def _get_data_augment_layer(self): data_augment_layer = tf.keras.models.Sequential([ KerasPreprocessing.RandomFlip('horizontal'), KerasPreprocessing.RandomRotation(0.2), KerasPreprocessing.RandomZoom(0.2), KerasPreprocessing.RandomHeight(0.2), KerasPreprocessing.Resizing(height=self.IMAGE_DIM[0], width=self.IMAGE_DIM[1]) ]) return data_augment_layer
def stft_function(self, input_x): x = tf.signal.stft(input_x, frame_length=255, frame_step=128) x = tf.abs(x) x = tf.expand_dims(x, -1) if self.resize_bool == True: x = preprocessing.Resizing(RESIZE_X, RESIZE_Y)(x) x = layers.BatchNormalization()(x) return x
def define_model(trial, input_shape, ds_train, num_labels): n_layers = trial.suggest_int("n_layers", low=3, high=10, step=1) norm_layer = preprocessing.Normalization() norm_layer.adapt(ds_train.map(lambda x, _: x)) model = models.Sequential() model.add(layers.Input(shape=input_shape)) model.add(preprocessing.Resizing(32, 32)) model.add(norm_layer) for i in range(n_layers): filter_size = trial.suggest_int("n_units_l{}".format(i), low=32, high=512) model.add( layers.Conv2D(filter_size, 3, activation='relu', padding='same')) dropout_value = trial.suggest_float("dropout_{}".format(i), 0.0, 0.5) model.add(layers.Dropout(dropout_value)) # There's already a maxpooling layer after this loop, so don't add a double one. if i == (n_layers - 1): break add_max_pooling = trial.suggest_int("maxpool_{}".format(i), 0, 1) if add_max_pooling == 1: model.add(layers.MaxPooling2D(padding='same')) model.add(layers.MaxPooling2D(padding='same')) dropout_value = trial.suggest_float("dropout_beforelast", 0.1, 0.5) model.add(layers.Dropout(dropout_value)) model.add(layers.Flatten()) dense_size = trial.suggest_int("n_units_last", low=64, high=512) model.add(layers.Dense(dense_size, activation='relu')) dropout_value = trial.suggest_float("dropout_last", 0.1, 0.5) model.add(layers.Dropout(dropout_value)) model.add(layers.Dense(num_labels, activation='relu')) model.summary() model.compile( optimizer=tf.keras.optimizers.Adam(), loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), metrics=['accuracy'], ) return model
def build(self, hp): if self.input_tensor is not None: inputs = tf.keras.utils.get_source_inputs(self.input_tensor) x = self.input_tensor else: inputs = layers.Input(shape=self.input_shape) x = inputs if self.augmentation_model: if isinstance(self.augmentation_model, hypermodel.HyperModel): augmentation_model = self.augmentation_model.build(hp) elif isinstance(self.augmentation_model, keras.models.Model): augmentation_model = self.augmentation_model x = augmentation_model(x) # Select one of pre-trained EfficientNet as feature extractor version = hp.Choice("version", ["B{}".format(i) for i in range(8)], default="B0") img_size = EFFICIENTNET_IMG_SIZE[version] x = preprocessing.Resizing(img_size, img_size, interpolation="bilinear")(x) efficientnet_model = EFFICIENTNET_MODELS[version](include_top=False, input_tensor=x) # Rebuild top layers of the model. x = efficientnet_model.output pooling = hp.Choice("pooling", ["avg", "max"], default="avg") if pooling == "avg": x = layers.GlobalAveragePooling2D(name="avg_pool")(x) elif pooling == "max": x = layers.GlobalMaxPooling2D(name="max_pool")(x) top_dropout_rate = hp.Float("top_dropout_rate", min_value=0.2, max_value=0.8, step=0.2, default=0.2) x = layers.Dropout(top_dropout_rate, name="top_dropout")(x) x = layers.Dense(self.classes, activation="softmax", name="probs")(x) # compile model = keras.Model(inputs, x, name="EfficientNet") self._compile(model, hp) return model
def _build_model(self) -> tf.keras.Model: """ Build the model for training. """ # Split the dataset into train, test, validate components splits = split_train_test_validate(self.samples) # Preprocess dataset to generate FFTs train_ds, val_ds, test_ds = tuple( map(lambda files: preprocess_dataset(files, self.commands), splits)) # Pre-calculate the input shape for entry into the model. self._input_shape = next( iter(map(lambda t: t[0].shape, train_ds.take(1)))) # Batch and configure prefetching and caching for data reads. train_ds = train_ds.batch(self.batch_size) val_ds = val_ds.batch(self.batch_size) self._test_ds = test_ds self._train_ds = train_ds.cache().prefetch(AUTOTUNE) self._val_ds = val_ds.cache().prefetch(AUTOTUNE) num_labels = len(self.commands) norm_layer = preprocessing.Normalization() norm_layer.adapt(train_ds.map(lambda x, _: x)) model = models.Sequential([ layers.InputLayer(input_shape=self._input_shape), preprocessing.Resizing(32, 32), norm_layer, layers.Conv2D(32, 3, activation="relu"), layers.Conv2D(64, 3, activation="relu"), layers.MaxPooling2D(), layers.Dropout(0.25), layers.Flatten(), layers.Dense(128, activation="relu"), layers.Dropout(0.5), layers.Dense(num_labels), ]) model.compile( optimizer=tf.keras.optimizers.Adam(), loss=tf.keras.losses.SparseCategoricalCrossentropy( from_logits=True), metrics=["accuracy"], ) return model
def basic_CNN(self, data_set, input_shape, num_labels): ''' Pour le modèle, vous utiliserez un simple réseau de neurones convolutifs (CNN), puisque vous avez transformé les fichiers audio en images de spectrogramme. Le modèle comporte également les couches de prétraitement supplémentaires suivantes : Une couche de Resizing pour sous-échantillonner l'entrée afin de permettre au modèle de s'entraîner plus rapidement. Une Normalization couche de normaliser chaque pixel de l'image en fonction de son écart moyen et standard. Pour la couche de Normalization , sa méthode d' adapt devrait d'abord être appelée sur les données d'apprentissage afin de calculer les statistiques agrégées (c'est-à-dire la moyenne et l'écart type). Parameters ---------- data_set : tf.data.Dataset.from_tensor_slices data used for the model input_shape : Tensor Tensor indicating the shape of the input num_labels : int number of different labels Returns ------- Keras Sequential Model ''' norm_layer = preprocessing.Normalization() norm_layer.adapt(data_set.map(lambda x, _: x)) model = models.Sequential([ layers.Input(shape=input_shape), preprocessing.Resizing(32, 32), norm_layer, layers.Conv2D(32, 3, activation='relu'), layers.Conv2D(64, 3, activation='relu'), layers.MaxPooling2D(), layers.Dropout(0.25), layers.Flatten(), layers.Dense(128, activation='relu'), layers.Dropout(0.5), layers.Dense(num_labels), ]) model.summary() return model
def create_sequential_model(input_shape, norm_layer, num_labels): model = models.Sequential([ layers.Input(shape=input_shape), preprocessing.Resizing(32, 32), # norm_layer, # Not sure if this is causing the issue layers.Conv2D(32, 3, activation='relu'), layers.Conv2D(64, 3, activation='relu'), layers.MaxPooling2D(), layers.Dropout(0.25), # layers.Flatten(), # Reshape instead?? layers.Dense(128, activation='relu'), layers.Dropout(0.5), layers.Dense(num_labels) ]) return model
def build_model(input_shape, spectrogram_ds, num_labels): # normalization: normalize incoming data based on original spectrogram dataset to build model def normalization(spectrogram_ds): norm_layer = preprocessing.Normalization() norm_layer.adapt(spectrogram_ds.map(lambda x, _: x)) return norm_layer # model: default model to process audio, taken from google guide model = models.Sequential([ layers.Input(shape=input_shape), preprocessing.Resizing(32, 32), normalization(spectrogram_ds), layers.Conv2D(32, 3, activation='relu'), layers.Conv2D(64, 3, activation='relu'), layers.MaxPooling2D(), layers.Dropout(0.25), layers.Flatten(), layers.Dense(128, activation='relu'), layers.Dropout(0.5), layers.Dense(num_labels), ]) return model
def define_model(input_shape, ds_train, num_labels, layer_sizes: List[int], dropouts: List[float]): norm_layer = preprocessing.Normalization() norm_layer.adapt(ds_train.map(lambda x, _: x)) model = models.Sequential() model.add(layers.Input(shape=input_shape)) model.add(preprocessing.Resizing(32, 32)) model.add(norm_layer) for i in range(len(layer_sizes)): model.add( layers.Conv2D(layer_sizes[i], kernel_size=3, activation='relu', padding='same')) model.add(layers.Dropout(dropouts[i])) model.add(layers.MaxPooling2D(padding='same')) model.add(layers.Flatten()) model.add(layers.Dense(128, activation="relu")), model.add(layers.Dense(num_labels, activation="relu")) model.summary() lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay( initial_learning_rate=1e-2, decay_steps=10000, decay_rate=0.9) model.compile( optimizer=tf.keras.optimizers.SGD(learning_rate=lr_schedule), loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), metrics=['accuracy']) return model
val_ds = val_ds.batch(batch_size) train_ds = train_ds.cache().prefetch(AUTOTUNE) val_ds = val_ds.cache().prefetch(AUTOTUNE) for spectrogram, _ in spectrogram_ds.take(1): input_shape = spectrogram.shape print('Input shape:', input_shape) num_labels = len(commands) norm_layer = preprocessing.Normalization() norm_layer.adapt(spectrogram_ds.map(lambda x, _: x)) model = models.Sequential([ layers.Input(shape=input_shape), preprocessing.Resizing(32, 32), norm_layer, layers.Conv2D(32, 3, activation='relu'), layers.Conv2D(64, 3, activation='relu'), layers.MaxPooling2D(), layers.Dropout(0.25), layers.Flatten(), layers.Dense(128, activation='relu'), layers.Dropout(0.5), layers.Dense(num_labels), ]) model.summary() model.compile( optimizer=tf.keras.optimizers.Adam(), loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
# result = Lambda(stft_func, arguments={'frame_size':255, 'delay_size':128})(one_data) train_data.append(result) print("\r{}th file is done...".format(i + 1), end='') train_data = np.array(train_data) # result = np.expand_dims(train_data, -1) result = tf.expand_dims(train_data, -1) print(result.shape) input_data = tf.data.Dataset.from_tensor_slices(result) print(input_data) input_sig = keras.Input(shape=(504, 127, 1)) x = preprocessing.Resizing(32, 32)(input_sig) print(x.shape) x = preprocessing.Normalization()(x) x = layers.Conv2D(32, 3, activation='relu')(x) x = layers.Conv2D(64, 3, activation='relu')(x) x = layers.MaxPooling2D()(x) x = layers.Dropout(0.25)(x) x = layers.Flatten()(x) x = layers.Dense(128, activation='relu')(x) x = layers.Dropout(0.5)(x) answer = layers.Dense(7, activation='softmax')(x) model = keras.Model(inputs=input_sig, outputs=answer) model.summary()
class DataLoader(): resize = preprocessing.Resizing(32, 32) norm = preprocessing.Normalization() def __init__(self, n_training=6400, n_test=800) -> None: data_dir = pathlib.Path('data/mini_speech_commands') if not data_dir.exists(): tf.keras.utils.get_file( 'mini_speech_commands.zip', origin= "http://storage.googleapis.com/download.tensorflow.org/data/mini_speech_commands.zip", extract=True, cache_dir='.', cache_subdir='data') commands = np.array(tf.io.gfile.listdir(str(data_dir))) self.commands = commands[commands != 'README.md'] print('Commands:', self.commands) filenames = tf.io.gfile.glob(str(data_dir) + '/*/*') filenames = tf.random.shuffle(filenames, seed=123) num_samples = len(filenames) print('Number of total examples:', num_samples) print('Example file tensor:', filenames[0]) train_files = filenames[:6400] val_files = filenames[6400:6400 + 800] test_files = filenames[-800:] print('Training set size', len(train_files)) print('Validation set size', len(val_files)) print('Test set size', len(test_files)) AUTOTUNE = tf.data.AUTOTUNE files_ds = tf.data.Dataset.from_tensor_slices(train_files) waveform_ds = files_ds.map(self.get_waveform_and_label, num_parallel_calls=AUTOTUNE) self.train_set = waveform_ds.map( self.get_spectrogram_and_label_id, num_parallel_calls=AUTOTUNE).take(n_training) files_ds = tf.data.Dataset.from_tensor_slices(val_files) waveform_ds = files_ds.map(self.get_waveform_and_label, num_parallel_calls=AUTOTUNE) self.val_set = waveform_ds.map(self.get_spectrogram_and_label_id, num_parallel_calls=AUTOTUNE) files_ds = tf.data.Dataset.from_tensor_slices(test_files) waveform_ds = files_ds.map(self.get_waveform_and_label, num_parallel_calls=AUTOTUNE) self.test_set = waveform_ds.map( self.get_spectrogram_and_label_id, num_parallel_calls=AUTOTUNE).take(n_test) @staticmethod def decode_audio(audio_binary): audio, _ = tf.audio.decode_wav(audio_binary) return tf.squeeze(audio, axis=-1) @staticmethod def get_label(file_path): parts = tf.strings.split(file_path, os.path.sep) return parts[-2] @staticmethod def get_waveform_and_label(file_path): label = DataLoader.get_label(file_path) audio_binary = tf.io.read_file(file_path) waveform = DataLoader.decode_audio(audio_binary) return waveform, label @classmethod def get_spectrogram(cls, waveform): zero_padding = tf.zeros([16000] - tf.shape(waveform), dtype=tf.float32) equal_length = tf.concat([waveform, zero_padding], 0) spectrogram = tf.signal.stft(equal_length, frame_length=255, frame_step=128) spectrogram = tf.math.abs(spectrogram) spectrogram = tf.math.pow(spectrogram, 0.2) spectrogram = tf.expand_dims(spectrogram, -1) #spectrogram = DataLoader.resize(spectrogram) #spectrogram = DataLoader.norm(spectrogram) return spectrogram def get_spectrogram_and_label_id(self, audio, label): spectrogram = DataLoader.get_spectrogram(audio) spectrogram = tf.expand_dims(spectrogram, -1) label_id = tf.argmax(label == self.commands) return spectrogram, label_id def visualize(self): for spectrogram, label_id in self.train_set.take(1): print('Label:', label_id) print('Spectrogram shape:', spectrogram.shape) fig, axes = plt.subplots(1, figsize=(12, 8)) self.plot_spectrogram(spectrogram.numpy(), axes) axes.set_title('Spectrogram') plt.show() @staticmethod def plot_spectrogram(spectrogram, ax): # Convert to frequencies to log scale and transpose so that the time is # represented in the x-axis (columns). spectrogram = spectrogram[:, :, 0] log_spec = np.log(spectrogram.T) height = log_spec.shape[0] width = log_spec.shape[1] X = np.linspace(0, np.size(spectrogram), num=width, dtype=int) Y = range(height) ax.pcolormesh(X, Y, log_spec)
return np.array(train_data_np) train_data_x = list() for i, one_data in enumerate(train_data_1): result = stft_func(one_data) train_data_x.append(result) print("\r{}th file is done...".format(i + 1), end='') train_label = np.array(train_data_x) train_label = np.expand_dims(train_label, axis=-1) train_label = preprocessing.Resizing(32, 32)(train_label) # print(train_label) train_label = train_label.numpy() # train_data_1 = tf.data.Dataset.from_tensor_slices(train_data_1) # train_label = tf.data.Dataset.from_tensor_slices(train_label) train_data = tf.data.Dataset.from_tensor_slices( (train_data_1, train_label)).shuffle(5000).batch(4) # print(train_data) import time input_sig = keras.Input(shape=(64000, ))
def run_whole_thing(out_dir): os.makedirs(out_dir, exist_ok=True) # Set seed for experiment reproducibility seed = 55 tf.random.set_seed(seed) np.random.seed(seed) data_dir = pathlib.Path("data/mini_speech_commands") if not data_dir.exists(): # Get the files from external source and put them in an accessible directory tf.keras.utils.get_file( 'mini_speech_commands.zip', origin= "http://storage.googleapis.com/download.tensorflow.org/data/mini_speech_commands.zip", extract=True) # Convert the binary audio file to a tensor def decode_audio(audio_binary): audio, _ = tf.audio.decode_wav(audio_binary) return tf.squeeze(audio, axis=-1) # Get the label (yes, no, up, down, etc) for an audio file. def get_label(file_path): parts = tf.strings.split(file_path, os.path.sep) # Note: You'll use indexing here instead of tuple unpacking to enable this to work in a TensorFlow graph. return parts[-2] # Create a tuple that has the labeled audio files def get_waveform_and_label(file_path): label = get_label(file_path) audio_binary = tf.io.read_file(file_path) waveform = decode_audio(audio_binary) return waveform, label # Convert audio files to images def get_spectrogram(waveform): # Padding for files with less than 16000 samples zero_padding = tf.zeros([16000] - tf.shape(waveform), dtype=tf.float32) # Concatenate audio with padding so that all audio clips will be of the # same length waveform = tf.cast(waveform, tf.float32) equal_length = tf.concat([waveform, zero_padding], 0) spectrogram = tf.signal.stft(equal_length, frame_length=255, frame_step=128) spectrogram = tf.abs(spectrogram) return spectrogram # Label the images created from the audio files and return a tuple def get_spectrogram_and_label_id(audio, label): spectrogram = get_spectrogram(audio) spectrogram = tf.expand_dims(spectrogram, -1) label_id = tf.argmax(label == commands) return spectrogram, label_id # Preprocess any audio files def preprocess_dataset(files, autotune, commands): # Creates the dataset files_ds = tf.data.Dataset.from_tensor_slices(files) # Matches audio files with correct labels output_ds = files_ds.map(get_waveform_and_label, num_parallel_calls=autotune) # Matches audio file images to the correct labels output_ds = output_ds.map(get_spectrogram_and_label_id, num_parallel_calls=autotune) return output_ds # Get all of the commands for the audio files commands = np.array(tf.io.gfile.listdir(str(data_dir))) commands = commands[commands != 'README.md'] # Get a list of all the files in the directory filenames = tf.io.gfile.glob(str(data_dir) + '/*/*') # Shuffle the file names so that random bunches can be used as the training, testing, and validation sets filenames = tf.random.shuffle(filenames) # Create the list of files for training data train_files = filenames[:6400] # Create the list of files for validation data validation_files = filenames[6400:6400 + 800] # Create the list of files for test data test_files = filenames[-800:] autotune = tf.data.AUTOTUNE # Get the converted audio files for training the model files_ds = tf.data.Dataset.from_tensor_slices(train_files) waveform_ds = files_ds.map(get_waveform_and_label, num_parallel_calls=autotune) spectrogram_ds = waveform_ds.map(get_spectrogram_and_label_id, num_parallel_calls=autotune) # Preprocess the training, test, and validation datasets train_ds = preprocess_dataset(train_files, autotune, commands) validation_ds = preprocess_dataset(validation_files, autotune, commands) test_ds = preprocess_dataset(test_files, autotune, commands) # Batch datasets for training and validation batch_size = 64 train_ds = train_ds.batch(batch_size) validation_ds = validation_ds.batch(batch_size) # Reduce latency while training train_ds = train_ds.cache().prefetch(autotune) validation_ds = validation_ds.cache().prefetch(autotune) # Build model for spectrogram, _ in spectrogram_ds.take(1): input_shape = spectrogram.shape num_labels = len(commands) norm_layer = preprocessing.Normalization() norm_layer.adapt(spectrogram_ds.map(lambda x, _: x)) model = models.Sequential([ layers.Input(shape=input_shape), preprocessing.Resizing(32, 32), norm_layer, layers.Conv2D(32, 3, activation='relu'), layers.Conv2D(64, 3, activation='relu'), layers.MaxPooling2D(), layers.Dropout(0.25), layers.Flatten(), layers.Dense(128, activation='relu'), layers.Dropout(0.5), layers.Dense(num_labels), ]) model.summary() # Configure built model with losses and metrics model.compile( optimizer=tf.keras.optimizers.Adam(), loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), metrics=['accuracy'], ) # Finally train the model and return info about each epoch EPOCHS = 10 model.fit( train_ds, validation_data=validation_ds, epochs=EPOCHS, callbacks=tf.keras.callbacks.EarlyStopping(verbose=1, patience=2), ) # Test the model test_audio = [] test_labels = [] for audio, label in test_ds: test_audio.append(audio.numpy()) test_labels.append(label.numpy()) test_audio = np.array(test_audio) test_labels = np.array(test_labels) # See how accurate the model is when making predictions on the test dataset y_pred = np.argmax(model.predict(test_audio), axis=1) y_true = test_labels test_acc = sum(y_pred == y_true) / len(y_true) print(f'Test set accuracy: {test_acc:.0%}')
temp_size += tensor_gap_size x7 = tf.slice(x, begin=[0, temp_size, 0], size=[-1, tmp_shape[-2] - temp_size, tmp_shape[-1]]) x0 = tf.expand_dims(x0, -1) x1 = tf.expand_dims(x1, -1) x2 = tf.expand_dims(x2, -1) x3 = tf.expand_dims(x3, -1) x4 = tf.expand_dims(x4, -1) x5 = tf.expand_dims(x5, -1) x6 = tf.expand_dims(x6, -1) x7 = tf.expand_dims(x7, -1) x0 = preprocessing.Resizing(32, 32)(x0) x1 = preprocessing.Resizing(32, 32)(x1) x2 = preprocessing.Resizing(32, 32)(x2) x3 = preprocessing.Resizing(32, 32)(x3) x4 = preprocessing.Resizing(32, 32)(x4) x5 = preprocessing.Resizing(32, 32)(x5) x6 = preprocessing.Resizing(32, 32)(x6) x7 = preprocessing.Resizing(32, 32)(x7) cnn_block_0 = CNN_block(channel_size=cnn_chan_size) cnn_block_1 = CNN_block(channel_size=cnn_chan_size) cnn_block_2 = CNN_block(channel_size=cnn_chan_size) cnn_block_3 = CNN_block(channel_size=cnn_chan_size) cnn_block_4 = CNN_block(channel_size=cnn_chan_size) cnn_block_5 = CNN_block(channel_size=cnn_chan_size) cnn_block_6 = CNN_block(channel_size=cnn_chan_size)
batch_y_train[current_size] = partsClear[k*voice_max_length:(k+1)*voice_max_length] current_size+=1 if current_size>=batch_size: break return batch_x_train, batch_y_train print('Build model...') if os.path.exists(model_name): print("Load: " + model_name) model = load_model(model_name) else: main_input = Input(shape=(voice_max_length, image_width, int(frame_length/2+1)), name='main_input') x = main_input x = TimeDistributed(Reshape((image_width, int(frame_length/2+1), 1)))(x) x = TimeDistributed(preprocessing.Resizing(image_width//2, int(frame_length/2+1)//2))(x) x = TimeDistributed(Conv2D(34, 3, activation='relu'))(x) x = TimeDistributed(Conv2D(64, 3, activation='relu'))(x) x = TimeDistributed(MaxPooling2D())(x) x = TimeDistributed(Dropout(0.1))(x) x = TimeDistributed(Flatten())(x) x = LSTM(256, activation='tanh', recurrent_activation='sigmoid', return_sequences=True)(x) x = Dense(int(frame_length/2+1), activation='sigmoid')(x) x = Reshape((voice_max_length, 1, int(frame_length/2+1)))(x) x = Multiply()([x, main_input]) model = Model(inputs=main_input, outputs=x) tf.keras.utils.plot_model(model, to_file='model_lstm_image.png', show_shapes=True) model.compile(loss='mse', metrics='mse', optimizer='adam')#Adam, SGD, Adagrad print('Train...') history = model.fit(MySequence(x_train, x_train_count, batch_size), epochs=epochs, steps_per_epoch=x_train_count//batch_size)
if current_size >= batch_size: break return batch_x_train, batch_y_train print('Build model...') if os.path.exists(model_name): print("Load: " + model_name) model = load_model(model_name) else: main_input = Input(shape=(image_width, int(frame_length / 2 + 1)), name='main_input') x = main_input x = Reshape((image_width, int(frame_length / 2 + 1), 1))(x) x = preprocessing.Resizing(image_width // 2, int(frame_length / 2 + 1) // 2)(x) x = Conv2D(34, 3, activation='relu')(x) x = Conv2D(64, 3, activation='relu')(x) x = MaxPooling2D()(x) x = Dropout(0.1)(x) x = Flatten()(x) x = Dense(int(frame_length / 2 + 1), activation='sigmoid')(x) x = Multiply()([x, main_input]) model = Model(inputs=main_input, outputs=x) tf.keras.utils.plot_model(model, to_file='model_dense_image.png', show_shapes=True) model.compile(loss='mse', metrics='mse', optimizer='adam') print('Train...') history = model.fit(MySequence(x_train, x_train_count, batch_size),
val_ds = val_ds.batch(batch_size) train_ds = train_ds.cache().prefetch(AUTOTUNE) val_ds = val_ds.cache().prefetch(AUTOTUNE) for spectrogram, _ in spectrogram_ds.take(1): input_shape = spectrogram.shape print('Input shape:', input_shape) num_labels = len(labels) norm_layer = preprocessing.Normalization() norm_layer.adapt(spectrogram_ds.map(lambda x, _: x)) model = models.Sequential([ layers.Input(shape=input_shape), preprocessing.Resizing(64, 64), norm_layer, layers.Conv2D(64, 5, activation='relu'), layers.Conv2D(128, 3, activation='relu'), layers.MaxPooling2D(), layers.Dropout(0.25), layers.Flatten(), layers.Dense(128, activation='relu'), layers.Dropout(0.2), layers.Dense(num_labels), ]) model.summary() model.compile( optimizer=tf.keras.optimizers.Adam(),
# result = tf.expand_dims(result, 0) # if i == 0: # a = tf.zeros(result.shape) # train_data = tf.raw_ops.Add(x=a, y=result) # else: # train_data = tf.concat([train_data, result], axis=0) # print("\r{}th file is done...".format(i+1), end='') result = np.expand_dims(train_data, -1) # result = tf.raw_ops.ExpandDims(train_data, -1) print(result.shape) x = preprocessing.Resizing(32, 32)(result) print(x.shape) x = preprocessing.Normalization()(x) x = layers.Conv2D(32, 3, activation='relu')(x) x = layers.Conv2D(64, 3, activation='relu')(x) x = layers.MaxPooling2D()(x) x = layers.Dropout(0.25)(x) x = layers.Flatten()(x) x = layers.Dense(128, activation='relu')(x) x = layers.Dropout(0.5)(x) answer = layers.Dense(7, activation='softmax')(x) model = keras.Model(inputs=input_sigss, outputs=answer) model.summary()
compression='gzip') f.create_dataset('spectr_test',data=spectr_test, compression='gzip') f.create_dataset('label_test',data=label_test, compression='gzip') f.close() print('file size: %s'%list(os.stat(h5f))[6]) for spectrogram,_ in train_ds.take(1): input_shape=spectrogram.shape num_labels=len(names) norm_layer=tkp.Normalization() norm_layer.adapt(train_ds.map(lambda x,_:x)) model=tkm.Sequential([ tkl.InputLayer(input_shape=input_shape), tkp.Resizing(32,32), norm_layer, tkl.Conv2D(32,3,activation='relu'), tkl.Conv2D(96,3,activation='relu'), tkl.MaxPooling2D(), tkl.Dropout(.25), tkl.Flatten(), tkl.Dense(256,activation='relu'), tkl.Dropout(.5), tkl.Dense(num_labels), ]) model.summary() model.compile( optimizer=tf.keras.optimizers.Adam(), loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
# #print("\n******************************\n") #print(spectrogram.shape) #print(spectrogram) #print("\n******************************\n") # x = tf.abs(input_vec) # x = tf.expand_dims(x, -1) # x = layers.Conv1D(32, 3, activation='relu')(x) # x = layers.Conv1D(64, 3, activation='relu')(x) # spectrogram = tf.expand_dims(x, -1) # input_vec = tf.keras.Input(shape=(spectrogram.shape[1], spectrogram.shape[2], 1)) # x = preprocessing.Resizing(64, 64)(input_vec) x = preprocessing.Resizing(32, 32)(spectrogram) x = preprocessing.Normalization()(x) # x = preprocessing.Normalization()(x) # x = layers.Conv2D(32, 3, activation='relu')(input_vec) x = layers.Conv2D(32, 3, activation='relu')(x) # x = layers.Conv2D(32, 3, activation='relu')(x) # x = layers.MaxPooling2D()(x) # x = layers.Conv2D(64, 3, activation='relu')(x) x = layers.Conv2D(64, 3, activation='relu')(x) x = layers.MaxPooling2D()(x) x = layers.Dropout(0.25)(x) x = layers.Flatten()(x) x = layers.Dense(128, activation='relu')(x) x = layers.Dropout(0.5)(x) answer = layers.Dense(6, activation='softmax')(x)