Пример #1
0
#target_dev = prepare_data.load_transcripts('../augmented_labels/data/normalized/transcripts/swedish/dev.txt')

# LibriSpeech ASR data
#features_train = prepare_data.load_features('../augmented_labels/data/normalized/features/libri/train')
#target_train = prepare_data.load_transcripts('../augmented_labels/data/normalized/transcripts/libri/train.txt')

#features_dev = prepare_data.load_features_combined('../augmented_labels/data/normalized/features/libri/dev.npy')
#target_dev = prepare_data.load_transcripts('../augmented_labels/data/normalized/transcripts/libri/dev.txt')

# LibriSpeech data
features_train = prepare_data.load_features(
    '../augmented_labels/data/normalized/features/libri/train')
target_train = prepare_data.load_transcripts(
    '../augmented_labels/data/normalized/augmented/libri/train.txt')

features_dev = prepare_data.load_features_combined(
    '../augmented_labels/data/normalized/features/libri/dev.npy')
target_dev = prepare_data.load_transcripts(
    '../augmented_labels/data/normalized/augmented/libri/dev.txt')

print('Done...')

print('Loading embeddings...')
#embeddings = fasttext.load_model('weights/embeddings/cc.sv.300.bin')
embeddings = fasttext.load_model(
    'weights/embeddings/crawl-300d-2M-subword.bin')
print('Done...')

# generate index dictionaries
#char2idx, idx2char = prepare_data.encode_data(target_train)

# generate index dictionaries
Пример #2
0
        temp_transcripts = []
        temp_tags = []

    return transcripts, tags






if __name__ == '__main__':
    torch.manual_seed(0)
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    # test Parliament
    features_test = prepare_data.load_features_combined('../../TSD/augmented_labels/data/normalized/features/test.npy')
    target_test = prepare_data.load_transcripts('../../TSD/augmented_labels/data/normalized/augmented/parliament/test.txt')
    
    # compare againt conventional NER
    #features_test = prepare_data.load_features_combined('../augmented_labels/data/normalized/features/test.npy')
    #target_test = prepare_data.load_transcripts('output/parliament/e2e_asr_combined.txt')
    #tags_test = prepare_data.load_tags('output/parliament/conventional_ner.txt')

    features_test = features_test[:50]
    target_test = target_test[:50]


    print('Loading embeddings...')
    embeddings = fasttext.load_model('weights/embeddings/cc.fi.300.bin')
    print('Done...')
Пример #3
0
# load features and labels
print('Loading data..')

# Parliament data ASR
#features_train = prepare_data.load_features('data/normalized/features/train')
#target_train = prepare_data.load_transcripts('data/normalized/transcripts/train.txt')

#features_dev = prepare_data.load_features_combined('data/normalized/features/dev.npy')
#target_dev = prepare_data.load_transcripts('data/normalized/transcripts/dev.txt')

# Parlaiament data augmented
features_train = prepare_data.load_features('data/normalized/features/train')
target_train = prepare_data.load_transcripts(
    'data/normalized/augmented/parliament/train.txt')

features_dev = prepare_data.load_features_combined(
    'data/normalized/features/dev.npy')
target_dev = prepare_data.load_transcripts(
    'data/normalized/augmented/parliament/dev.txt')

print('Done...')

print('Loading embeddings...')
embeddings = fasttext.load_model('weights/embeddings/cc.fi.300.bin')
print('Done...')

with open('weights/char2idx_augmented.pkl', 'rb') as f:
    char2idx = pickle.load(f)
with open('weights/idx2char_augmented.pkl', 'rb') as f:
    idx2char = pickle.load(f)

# convert labels to indices