def load_audio_embedding_model_from_path(model_path, input_repr, embedding_size, frontend='kapre'): """ Loads a model with weights at the given path. Parameters ---------- model_path : str Path to model weights HDF5 (.h5) file. Must be in format `*._<input_repr>_<content_type>.h5` or `*._<input_repr>_<content_type>-.*.h5`, since model configuration will be determined from the filename. input_repr : "linear", "mel128", or "mel256" Spectrogram representation used for audio model. embedding_size : 6144 or 512 Embedding dimensionality. frontend : "kapre" or "librosa" The audio frontend to use. If frontend == 'kapre', then the kapre frontend will be included. Otherwise no frontend will be added inside the keras model. Returns ------- model : tf.keras.Model Model object. """ frontend, input_repr = _validate_audio_frontend(frontend, input_repr) # Construct embedding model and load model weights with warnings.catch_warnings(): warnings.simplefilter("ignore") m = AUDIO_MODELS[input_repr](include_frontend=frontend == 'kapre') m.load_weights(model_path) # Pooling for final output embedding size pool_size = AUDIO_POOLING_SIZES[input_repr][embedding_size] y_a = MaxPooling2D(pool_size=pool_size, padding='same')(m.output) y_a = Flatten()(y_a) m = Model(inputs=m.input, outputs=y_a) m.frontend = frontend return m
def load_audio_embedding_model(input_repr, content_type, embedding_size, frontend='kapre'): """ Returns a model with the given characteristics. Loads the model if the model has not been loaded yet. Parameters ---------- input_repr : "linear", "mel128", or "mel256" Spectrogram representation used for audio model. content_type : "music" or "env" Type of content used to train embedding. embedding_size : 6144 or 512 Embedding dimensionality. frontend : "kapre" or "librosa" The audio frontend to use. If frontend == 'kapre', then the kapre frontend will be included. Otherwise no frontend will be added inside the keras model. Returns ------- model : tf.keras.Model Model object. """ frontend, input_repr = _validate_audio_frontend(frontend, input_repr) # Construct embedding model and load model weights with warnings.catch_warnings(): warnings.simplefilter("ignore") m = AUDIO_MODELS[input_repr](include_frontend=frontend == 'kapre') m.load_weights(get_audio_embedding_model_path(input_repr, content_type)) # Pooling for final output embedding size pool_size = AUDIO_POOLING_SIZES[input_repr][embedding_size] y_a = MaxPooling2D(pool_size=pool_size, padding='same')(m.output) y_a = Flatten()(y_a) m = Model(inputs=m.input, outputs=y_a) m.frontend = frontend return m