Exemplo n.º 1
0
    left_context = int(sys.argv[5])
    right_context = int(sys.argv[6])
    lda_path = sys.argv[7]
    output_path = sys.argv[8]

    num_epochs = 400
    batch_size = 256
    learning_rate = 0.0015
    model_type = 'SAT-LHUC'

    utt_to_spk = load_utt_to_spk(utt2spk, lambda x: "".join(x.split("-")[1:-1]))
    utt_to_pdfs = load_utt_to_pdfs(pdfs)
    num_spks = max(utt_to_spk.values()) + 1
    print "num_spks = %d" % num_spks

    train_dataset = load_dataset(train_data, utt_to_spk, utt_to_pdfs, chunk_size=8, subsampling_factor=1, left_context=left_context, right_context=right_context, speaker_independent_prob=0.5)
    train_dataset = train_dataset.batch(batch_size, drop_remainder=True)
    train_dataset = train_dataset.prefetch(1024)
    x, spk, y = train_dataset.make_one_shot_iterator().get_next()

    val_dataset = load_dataset(val_data, utt_to_spk, utt_to_pdfs, chunk_size=8, subsampling_factor=1, left_context=left_context, right_context=right_context, speaker_independent_prob=1.0)
    val_dataset = val_dataset.batch(batch_size, drop_remainder=True)
    val_dataset = val_dataset.take(512).cache().repeat()
    val_x, val_spk, val_y = val_dataset.make_one_shot_iterator().get_next()

    if model_type == 'SAT-LHUC':
        model = create_sat_model(600, lda_path, num_spks)
    elif model_type == 'SAT-SPARSE-LHUC':
        model = create_sparse_lhuc_sat_model(850, 0.1 / 850 / batch_size, lda_path, num_spks)

    model.compile(
Exemplo n.º 2
0
    utt2spk = sys.argv[3]
    pdfs = sys.argv[4]
    left_context = int(sys.argv[5])
    right_context = int(sys.argv[6])
    output_path = sys.argv[7]

    # 1 epoch is roughly 25270800*0.97 = 24512676 frames 
    # 1 epoch is roughly 61 iterations of 400000 frames each
    num_iterations = 61*6
    batch_size = 256
    learning_rate = 0.0015

    utt_to_spk = load_utt_to_spk(utt2spk)
    utt_to_pdfs = load_utt_to_pdfs(pdfs)

    train_dataset = load_dataset(train_data, utt_to_spk, utt_to_pdfs, chunk_size=1, subsampling_factor=1, left_context=left_context, right_context=right_context)
    train_dataset = train_dataset.batch(batch_size, drop_remainder=True)
    train_dataset = train_dataset.prefetch(512)
    x, _, y = train_dataset.make_one_shot_iterator().get_next()

    val_dataset = load_dataset(val_data, utt_to_spk, utt_to_pdfs, chunk_size=1, subsampling_factor=1, left_context=left_context, right_context=right_context)
    val_dataset = val_dataset.batch(batch_size, drop_remainder=True)
    val_dataset = val_dataset.take(256).cache().repeat()
    val_x, _, val_y = val_dataset.make_one_shot_iterator().get_next()

    model = create_model(800, 3976)
    model.compile(
        loss='sparse_categorical_crossentropy',
        metrics=['accuracy'],
        optimizer=Adam(lr=learning_rate, amsgrad=True, clipvalue=1.)
    )