예제 #1
0
###################
# Create datasets #
###################
# TODO replace with Kaldi
data_dir = '/home/vano/wrkdir/datasets/LibriSpeech'
# data_dir = '/home/vano/wrkdir/projects_data/sre_2019/toy_dataset'

train = LibriSpeechDataset(data_dir, training_set, n_seconds, pad=pad)
valid = LibriSpeechDataset(data_dir,
                           validation_set,
                           n_seconds,
                           stochastic=False,
                           pad=pad)

batch_preprocessor = BatchPreProcessor('siamese',
                                       preprocess_instances(downsampling))
train_generator = (batch_preprocessor(batch)
                   for batch in train.yield_verification_batches(batchsize))
valid_generator = (batch_preprocessor(batch)
                   for batch in valid.yield_verification_batches(batchsize))

################
# Define model #
################
encoder = get_baseline_convolutional_encoder(filters,
                                             embedding_dimension,
                                             dropout=dropout)
siamese = build_siamese_net(encoder, (input_length, 1),
                            distance_metric='uniform_euclidean')
opt = Adam(clipnorm=1.)
siamese.compile(loss='binary_crossentropy',
예제 #2
0
        self.batch_to_index = {
            i: self.underlying_indexes[i * batchsize:(i + 1) * batchsize]
            for i in range(len(self))
        }


def label_preprocessor(num_classes, speaker_id_mapping):
    def label_preprocessor_(y):
        y = np.array([speaker_id_mapping[i] for i in y[:, 0]])[:, np.newaxis]
        return to_categorical(y, num_classes)

    return label_preprocessor_


batch_preprocessor = BatchPreProcessor(
    'classifier', preprocess_instances(downsampling),
    label_preprocessor(train.num_classes(), speaker_id_mapping))

train_generator = BatchedSequence(train, batch_preprocessor, batchsize)

################
# Define model #
################
classifier = get_baseline_convolutional_encoder(filters, embedding_dimension,
                                                (input_length, 1))
# Add output classification layer
classifier.add(Dense(train.num_classes(), activation='softmax'))

opt = Adam(clipnorm=1.)
classifier.compile(loss='categorical_crossentropy',
                   optimizer=opt,