Пример #1
0
###################
# Create datasets #
###################
# TODO replace with Kaldi
data_dir = '/home/vano/wrkdir/datasets/LibriSpeech'
# data_dir = '/home/vano/wrkdir/projects_data/sre_2019/toy_dataset'

train = LibriSpeechDataset(data_dir, training_set, n_seconds, pad=pad)
valid = LibriSpeechDataset(data_dir,
                           validation_set,
                           n_seconds,
                           stochastic=False,
                           pad=pad)

batch_preprocessor = BatchPreProcessor('siamese',
                                       preprocess_instances(downsampling))
train_generator = (batch_preprocessor(batch)
                   for batch in train.yield_verification_batches(batchsize))
valid_generator = (batch_preprocessor(batch)
                   for batch in valid.yield_verification_batches(batchsize))

################
# Define model #
################
encoder = get_baseline_convolutional_encoder(filters,
                                             embedding_dimension,
                                             dropout=dropout)
siamese = build_siamese_net(encoder, (input_length, 1),
                            distance_metric='uniform_euclidean')
opt = Adam(clipnorm=1.)
siamese.compile(loss='binary_crossentropy',
Пример #2
0
        self.batch_to_index = {
            i: self.underlying_indexes[i * batchsize:(i + 1) * batchsize]
            for i in range(len(self))
        }


def label_preprocessor(num_classes, speaker_id_mapping):
    def label_preprocessor_(y):
        y = np.array([speaker_id_mapping[i] for i in y[:, 0]])[:, np.newaxis]
        return to_categorical(y, num_classes)

    return label_preprocessor_


batch_preprocessor = BatchPreProcessor(
    'classifier', preprocess_instances(downsampling),
    label_preprocessor(train.num_classes(), speaker_id_mapping))

train_generator = BatchedSequence(train, batch_preprocessor, batchsize)

################
# Define model #
################
classifier = get_baseline_convolutional_encoder(filters, embedding_dimension,
                                                (input_length, 1))
# Add output classification layer
classifier.add(Dense(train.num_classes(), activation='softmax'))

opt = Adam(clipnorm=1.)
classifier.compile(loss='categorical_crossentropy',
                   optimizer=opt,

#################
# Training Loop #
#################
for fragment_length in n_seconds:
    print('*' * 23)
    print('***** {:.1f} seconds *****'.format(fragment_length))
    print('*' * 23)
    input_length = int(LIBRISPEECH_SAMPLING_RATE * fragment_length / downsampling)

    # Create datasets
    train = LibriSpeechDataset(training_set, fragment_length, pad=True)
    valid = LibriSpeechDataset(validation_set, fragment_length, stochastic=False, pad=True)

    batch_preprocessor = BatchPreProcessor('siamese', preprocess_instances(downsampling))
    train_generator = (batch_preprocessor(batch) for batch in train.yield_verification_batches(batchsize))
    valid_generator = (batch_preprocessor(batch) for batch in valid.yield_verification_batches(batchsize))

    for repeat in range(n_repeats):
        # Define model
        encoder = get_baseline_convolutional_encoder(model_n_filters, model_embedding_dimension, dropout=model_dropout)
        siamese = build_siamese_net(encoder, (input_length, 1), distance_metric='uniform_euclidean')
        opt = Adam(clipnorm=1.)
        siamese.compile(loss='binary_crossentropy', optimizer=opt, metrics=['accuracy'])

        # Train
        param_str = 'siamese__nseconds_{}__filters_{}__embed_{}__drop_{}__r_{}'.format(fragment_length, model_n_filters,
                                                                                       model_embedding_dimension,
                                                                                       model_dropout, repeat)
        print(param_str)
    def on_epoch_end(self):
        # Shuffle the indexes
        np.random.shuffle(self.underlying_indexes)
        self.batch_to_index = {i: self.underlying_indexes[i * batchsize:(i + 1) * batchsize] for i in range(len(self))}


def label_preprocessor(num_classes, speaker_id_mapping):
    def label_preprocessor_(y):
        y = np.array([speaker_id_mapping[i] for i in y[:, 0]])[:, np.newaxis]
        return to_categorical(y, num_classes)

    return label_preprocessor_


batch_preprocessor = BatchPreProcessor('classifier', preprocess_instances(downsampling),
                                       label_preprocessor(train.num_classes(), speaker_id_mapping))

train_generator = BatchedSequence(train, batch_preprocessor, batchsize)


################
# Define model #
################
classifier = get_baseline_convolutional_encoder(filters, embedding_dimension, (input_length, 1))
# Add output classification layer
classifier.add(Dense(train.num_classes(), activation='softmax'))

opt = Adam(clipnorm=1.)
classifier.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy'])
plot_model(classifier, show_shapes=True, to_file=PATH + '/plots/classifier.png')