# Vectorizing each element in each sequence sequences = np.zeros((len(word_sentences), max_len, len(word_to_ix))) for i, sentence in enumerate(word_sentences): for j, word in enumerate(sentence): sequences[i, j, word] = 1 return sequences sentences = pickle.load(open('sentences_intra', 'rb')) rootwords = pickle.load(open('rootwords_intra', 'rb')) features = pickle.load(open('features_intra', 'rb')) # we keep X_idx2word and y_idx2word the same X, X_vocab_len, X_word_to_ix, X_ix_to_word, y, y_vocab_len, y_word_to_ix, y_ix_to_word = load_data_for_seq2seq( sentences, rootwords) y1, y2, y3, y4, y5, y6, y7, y8 = load_data_for_features(sentences, features) # should be all equal for better results print(len(X)) print(X_vocab_len) print(len(X_word_to_ix)) print(len(X_ix_to_word)) print(len(y_word_to_ix)) print(len(y_ix_to_word)) X_max = max([len(word) for word in X]) y_max = max([len(word) for word in y]) X_max_len = max(X_max, y_max) y_max_len = max(X_max, y_max) print(X_max_len)
for i, sentence in enumerate(word_sentences): for j, word in enumerate(sentence): sequences[i, j, word] = 1 return sequences sentences = pickle.load(open('./pickle-dumps/sentences_intra', 'rb')) rootwords = pickle.load(open('./pickle-dumps/rootwords_intra', 'rb')) features = pickle.load(open('./pickle-dumps/features_intra', 'rb')) # we keep X_idx2word and y_idx2word the same # X_left & X_right = X shifted to one and two positions left and right for context2 X, X_vocab_len, X_word_to_ix, X_ix_to_word, y, y_vocab_len, y_word_to_ix, y_ix_to_word, X_left1, X_left2, X_left3, X_left4, X_left5, X_right1, X_right2, X_right3, X_right4, X_right5 = \ load_data_for_seq2seq(sentences, rootwords, test=False, context5=True) y1, y2, y3, y4, y5, y6, y7, y8 = load_data_for_features(features) y1, n1, y2, n2, y3, n3, y4, n4, y5, n5, y7, n7, y8, n8, enc, labels = process_features( y1, y2, y3, y4, y5, y7, y8) n = [n1, n2, n3, n4, n5, n7, n8] print(labels) # should be all equal for better results print(len(X)) print(X_vocab_len) print(len(X_word_to_ix)) print(len(X_ix_to_word)) #print(len(y_word_to_ix)) print(len(y_ix_to_word))