示例#1
0
    def create_net(self):
        from .vgg_thin import resnet_2D_v1
        import keras
        from keras.layers import Conv2D, AveragePooling2D, Reshape, Flatten, Dense
        from keras.models import Model, Sequential
        
        bottleneck_dim = 512
        l2_regularization = 0.01

        # Import Thin ResNet34
        # 
        inputs, x = resnet_2D_v1(self.input, mode='train')
        
        from keras import backend
        backend.set_image_data_format('channels_first')

        x_fc = Conv2D(bottleneck_dim, (7, 1),
            strides=(1, 1),
            activation='relu',
            kernel_initializer='orthogonal',
            use_bias=True, trainable=True,
            padding='same',
            kernel_regularizer=keras.regularizers.l2(l2_regularization),
            bias_regularizer=keras.regularizers.l2(l2_regularization),
            name='x_fc')(x)

        x = AveragePooling2D((1, 5), strides=(1, 1), name='avg_pool')(x)
        x = Flatten()(x)

        x = keras.layers.Dense(bottleneck_dim, activation='relu',
                               kernel_initializer='orthogonal',
                               use_bias=True, trainable=True,
                               kernel_regularizer=keras.regularizers.l2(l2_regularization),
                               bias_regularizer=keras.regularizers.l2(l2_regularization),
                               name='fc6')(x)

        dense_model = Sequential()
        add_final_layers(dense_model, self.config)

        x = dense_model(x)
        model = Model(inputs, x)

        adam = keras.optimizers.Adam(
            lr=wandb.config.learning_rate, # 0.0001 @ VGG
            beta_1=wandb.config.beta_1,
            beta_2=wandb.config.beta_2,
            epsilon=wandb.config.epsilon,
            decay=wandb.config.decay
        )

        loss_function = get_loss(self.config)
        model.compile(loss=loss_function, optimizer=adam, metrics=['accuracy'])
        model.summary()

        return model
    def create_net(self):
        model = Sequential()
        model.add(
            Bidirectional(LSTM(self.n_hidden1, return_sequences=True),
                          input_shape=self.input))
        model.add(Dropout(0.50))
        model.add(Bidirectional(LSTM(self.n_hidden2)))
        model.add(Dense(self.n_speakers * 10))
        model.add(Dropout(0.25))
        model.add(Dense(self.n_speakers * 5))
        add_final_layers(model, self.config)

        loss_function = get_loss(self.config)
        adam = keras.optimizers.Adam(self.adam_lr, self.adam_beta_1,
                                     self.adam_beta_2, self.adam_epsilon,
                                     self.adam_decay)
        model.compile(loss=loss_function, optimizer=adam, metrics=['accuracy'])
        return model
示例#3
0
def create_embeddings(config, checkpoints, x_list, y_list, out_layer=7, seg_size=100):
    # Prepare return value
    set_of_embeddings = []
    set_of_speakers = []
    set_of_num_embeddings = []
    set_of_total_times = []

    # Values out of the loop
    metrics = ['accuracy']
    loss = get_loss(config)
    custom_objects = get_custom_objects(config)
    optimizer = 'adadelta'

    for checkpoint in checkpoints:
        logger.info('Run checkpoint: ' + checkpoint)
        # Load and compile the trained network
        network_file = get_experiment_nets(checkpoint)
        model_full = load_model(network_file, custom_objects=custom_objects)
        model_full.compile(loss=loss, optimizer=optimizer, metrics=metrics)

        # Get a Model with the embedding layer as output and predict
        model_partial = Model(inputs=model_full.input, outputs=model_full.layers[out_layer].output)

        x_cluster_list = []
        y_cluster_list = []
        for x, y in zip(x_list, y_list):
            x_cluster = np.asarray(model_partial.predict(x))
            x_cluster_list.append(x_cluster)
            y_cluster_list.append(y)

        embeddings, speakers, num_embeddings = \
            generate_embeddings(x_cluster_list, y_cluster_list, x_cluster_list[0].shape[1])

        # Fill return values
        set_of_embeddings.append(embeddings)
        set_of_speakers.append(speakers)
        set_of_num_embeddings.append(num_embeddings)

        # Calculate the time per utterance
        time = TimeCalculator.calc_time_all_utterances(y_cluster_list, seg_size)
        set_of_total_times.append(time)

    return checkpoints, set_of_embeddings, set_of_speakers, set_of_num_embeddings, set_of_total_times
示例#4
0
def create_network_n_speakers(num_speakers, config):
    # Read parameters from config
    seg_size = config.getint('pairwise_kldiv', 'seg_size')
    spectrogram_height = config.getint('pairwise_kldiv', 'spectrogram_height')
    lr = config.getfloat('pairwise_kldiv', 'adadelta_learning_rate')
    rho = config.getfloat('pairwise_kldiv', 'adadelta_rho')
    epsilon = config.getfloat('pairwise_kldiv', 'adadelta_epsilon')

    # Initialize model
    model = Sequential()

    # convolution layer 1
    model.add(
        Conv2D(filters=32,
               kernel_size=(4, 4),
               activation='relu',
               input_shape=(1, seg_size, spectrogram_height),
               data_format='channels_first'))
    model.add(BatchNormalization())
    model.add(MaxPooling2D(pool_size=(4, 4), strides=(2, 2)))

    # convolution layer 2
    model.add(Conv2D(filters=64, kernel_size=(4, 4), activation='relu'))
    model.add(BatchNormalization())
    model.add(MaxPooling2D(pool_size=(4, 4), strides=(2, 2)))

    # dense layer
    model.add(Flatten())
    model.add(Dense(units=(num_speakers * 10), activation='relu'))
    model.add(BatchNormalization())
    model.add(Dropout(rate=0.5))
    model.add(Dense(units=(num_speakers * 5), activation='relu'))
    add_final_layers(model, config)

    loss_function = get_loss(config)

    # Create Optimizer
    adadelta = Adadelta(lr=lr, rho=rho, epsilon=epsilon, decay=0.0)

    # Compile model
    model.compile(loss=loss_function, optimizer=adadelta, metrics=['accuracy'])

    return model
示例#5
0
    def create_net__classification_component(self, model):
        model.add(Dense(self.config.getint('pairwise_lstm', 'n_dense1')))
        model.add(Dropout(0.25))

        model.add(Dense(self.config.getint('pairwise_lstm', 'n_dense2')))

        # This adds the final (Dense) layer
        #
        add_final_layers(model, self.config)

        loss_function = get_loss(self.config)

        adam = keras.optimizers.Adam(
            lr=self.config.getfloat('pairwise_lstm', 'adam_lr'),
            beta_1=self.config.getfloat('pairwise_lstm', 'adam_beta_1'),
            beta_2=self.config.getfloat('pairwise_lstm', 'adam_beta_2'),
            epsilon=self.config.getfloat('pairwise_lstm', 'adam_epsilon'),
            decay=self.config.getfloat('pairwise_lstm', 'adam_decay'))

        return model, loss_function, adam
示例#6
0
    def create_net(self):
        model = Sequential()

        model.add(
            Bidirectional(LSTM(self.n_hidden1, return_sequences=True),
                          input_shape=self.input))
        model.add(Dropout(0.50))
        model.add(Bidirectional(LSTM(self.n_hidden2)))

        model.add(Dense(self.dense_factor * 10))
        model.add(Dropout(0.25))
        model.add(Dense(self.dense_factor * 5))
        add_final_layers(model, self.config)

        loss_function = get_loss(self.config)
        adam = keras.optimizers.Adam(lr=0.001,
                                     beta_1=0.9,
                                     beta_2=0.999,
                                     epsilon=1e-08,
                                     decay=0.0)

        model.compile(loss=loss_function, optimizer=adam, metrics=['accuracy'])
        model.summary()
        return model
示例#7
0
    def get_embeddings(self):
        short_utterance = self.config.getboolean('test', 'short_utterances')
        out_layer = self.config.getint('pairwise_lstm', 'out_layer')
        seg_size = self.config.getint('pairwise_lstm', 'seg_size')
        vec_size = self.config.getint('pairwise_lstm', 'vec_size')

        logger = get_logger('lstm', logging.INFO)
        logger.info('Run pairwise_lstm test')
        logger.info('out_layer -> ' + str(out_layer))
        logger.info('seg_size -> ' + str(seg_size))
        logger.info('vec_size -> ' + str(vec_size))

        # Load and prepare train/test data
        x_train, speakers_train, s_list_train = load_test_data(
            self.get_validation_train_data())
        x_test, speakers_test, s_list_test = load_test_data(
            self.get_validation_test_data())
        x_train, speakers_train, = self.prepare_data(x_train, speakers_train)
        x_test, speakers_test = self.prepare_data(x_test, speakers_test)

        x_list, y_list, s_list = create_data_lists(short_utterance, x_train,
                                                   x_test, speakers_train,
                                                   speakers_test, s_list_train,
                                                   s_list_test)

        # Prepare return values
        set_of_embeddings = []
        set_of_speakers = []
        speaker_numbers = []
        set_of_total_times = []

        if self.best:
            file_regex = self.name + ".*_best\.h5"
        else:
            file_regex = self.name + ".*\.h5"

        checkpoints = list_all_files(get_experiment_nets(), file_regex)

        # Values out of the loop
        metrics = [
            'accuracy',
            'categorical_accuracy',
        ]
        loss = get_loss(self.config)
        custom_objects = get_custom_objects(self.config)
        optimizer = 'rmsprop'
        vector_size = vec_size

        # Fill return values
        for checkpoint in checkpoints:
            logger.info('Running checkpoint: ' + checkpoint)
            # Load and compile the trained network
            network_file = get_experiment_nets(checkpoint)
            model_full = load_model(network_file,
                                    custom_objects=custom_objects)
            model_full.compile(loss=loss, optimizer=optimizer, metrics=metrics)

            # Get a Model with the embedding layer as output and predict
            model_partial = Model(inputs=model_full.input,
                                  outputs=model_full.layers[out_layer].output)

            x_cluster_list = []
            y_cluster_list = []
            for x, y, s in zip(x_list, y_list, s_list):
                x_cluster = np.asarray(model_partial.predict(x))
                x_cluster_list.append(x_cluster)
                y_cluster_list.append(y)

            embeddings, speakers, num_embeddings = generate_embeddings(
                x_cluster_list, y_cluster_list, vector_size)

            # Fill the embeddings and speakers into the arrays
            set_of_embeddings.append(embeddings)
            set_of_speakers.append(speakers)
            speaker_numbers.append(num_embeddings)

            # Calculate the time per utterance
            time = TimeCalculator.calc_time_all_utterances(
                y_cluster_list, seg_size)
            set_of_total_times.append(time)

        logger.info('Pairwise_lstm test done.')
        return checkpoints, set_of_embeddings, set_of_speakers, speaker_numbers, set_of_total_times