english_language = Language('english') norwegian_language = Language('norwegian') # Filter by length and pad max_length = 30 padded_english_sentences, padded_norwegian_sentences = filter_pad_sentences( english_sentences, norwegian_sentences, max_length) # Create train and test set english_train_sentences, norwegian_train_sentences, english_test_sentences, norwegian_test_sentences = train_test_sentences( padded_english_sentences, padded_norwegian_sentences) # Count vocabulary in English and Norwegian sentences for sentence in english_train_sentences: english_language.count_words(sentence) for sentence in norwegian_train_sentences: norwegian_language.count_words(sentence) # Choose top N vocabulary in both languages N = 10000 english_language.top_n_words(N) norwegian_language.top_n_words(N) # Create dictionary to map words to idx english_language.index_words() norwegian_language.index_words() # Convert sentences into indices english_train_sentences_ids = convert_sentences_index(