# at training time however, we will use the "length" array to shrink that matrix following the largest sentence within a batch # in practice, this means that batches are padded with 1 or 2 zeros, or aren't even padded at all. kalchbrenner_path = "./data/binarySentiment/" train_x_indexes, train_y, train_lengths = dataUtils.read_and_sort_matlab_data( kalchbrenner_path + "train.txt", kalchbrenner_path + "train_lbl.txt") dev_x_indexes, dev_y, dev_lengths = dataUtils.read_and_sort_matlab_data( kalchbrenner_path + "valid.txt", kalchbrenner_path + "valid_lbl.txt") test_x_indexes, test_y, test_lengths = dataUtils.read_and_sort_matlab_data( kalchbrenner_path + "test.txt", kalchbrenner_path + "test_lbl.txt") # train data n_train_batches = len(train_lengths) / hyperparas['batch_size'] #dev data # to be able to do a correct evaluation, we pad a number of rows to get a multiple of the batch size dev_x_indexes_extended = dataUtils.pad_to_batch_size(dev_x_indexes, hyperparas['batch_size']) dev_y_extended = dataUtils.pad_to_batch_size(dev_y, hyperparas['batch_size']) n_dev_batches = dev_x_indexes_extended.shape[0] / hyperparas['batch_size'] n_dev_samples = len(dev_y) dataUtils.extend_lenghts(dev_lengths, hyperparas['batch_size']) # test data test_x_indexes_extended = dataUtils.pad_to_batch_size(test_x_indexes, hyperparas['batch_size']) test_y_extended = dataUtils.pad_to_batch_size(test_y, hyperparas['batch_size']) n_test_batches = test_x_indexes_extended.shape[0] / hyperparas['batch_size'] n_test_samples = len(test_y) dataUtils.extend_lenghts(test_lengths, hyperparas['batch_size']) ###################### # BUILD ACTUAL MODEL #
# load data, taken from Kalchbrenner matlab files # we order the input according to length and pad all sentences until the maximum length # at training time however, we will use the "length" array to shrink that matrix following the largest sentence within a batch # in practice, this means that batches are padded with 1 or 2 zeros, or aren't even padded at all. data_path = "../multinli_0.9/DCNN_format/" train_y, train_sents1, train_sents2, train_lens1, train_lens2 = dataUtils.read_and_sort(data_path + 'train.txt') dev_y, dev_sents1, dev_sents2, dev_lens1, dev_lens2 = dataUtils.read_and_sort(data_path + 'dev.txt') test_y, test_sents1, test_sents2, test_lens1, test_lens2 = dataUtils.read_and_sort(data_path + 'test.txt') # train data n_train_batches = len(train_lens1) / hyperparas['batch_size'] #dev data # to be able to do a correct evaluation, we pad a number of rows to get a multiple of the batch size dev_sents1_extended = dataUtils.pad_to_batch_size(dev_sents1,hyperparas['batch_size']) dev_sents2_extended = dataUtils.pad_to_batch_size(dev_sents2,hyperparas['batch_size']) dev_y_extended = dataUtils.pad_to_batch_size(dev_y,hyperparas['batch_size']) n_dev_batches = dev_sents1_extended.shape[0] / hyperparas['batch_size'] n_dev_samples = len(dev_y) dataUtils.extend_lenghts(dev_lens1,hyperparas['batch_size']) dataUtils.extend_lenghts(dev_lens2,hyperparas['batch_size']) # test data test_sents1_extended = dataUtils.pad_to_batch_size(test_sents1, hyperparas['batch_size']) test_sents2_extended = dataUtils.pad_to_batch_size(test_sents2, hyperparas['batch_size']) test_y_extended = dataUtils.pad_to_batch_size(test_y, hyperparas['batch_size']) n_test_batches = test_sents1_extended.shape[0] / hyperparas['batch_size'] n_test_samples = len(test_y) dataUtils.extend_lenghts(test_lens1,hyperparas['batch_size']) dataUtils.extend_lenghts(test_lens2,hyperparas['batch_size'])
# load data, taken from Kalchbrenner matlab files # we order the input according to length and pad all sentences until the maximum length # at training time however, we will use the "length" array to shrink that matrix following the largest sentence within a batch # in practice, this means that batches are padded with 1 or 2 zeros, or aren't even padded at all. kalchbrenner_path = "./data/binarySentiment/" train_x_indexes, train_y, train_lengths = dataUtils.read_and_sort_matlab_data(kalchbrenner_path+"train.txt",kalchbrenner_path+"train_lbl.txt") dev_x_indexes, dev_y, dev_lengths = dataUtils.read_and_sort_matlab_data(kalchbrenner_path+"valid.txt",kalchbrenner_path+"valid_lbl.txt") test_x_indexes, test_y, test_lengths = dataUtils.read_and_sort_matlab_data(kalchbrenner_path+"test.txt",kalchbrenner_path+"test_lbl.txt") # train data n_train_batches = len(train_lengths) / hyperparas['batch_size'] #dev data # to be able to do a correct evaluation, we pad a number of rows to get a multiple of the batch size dev_x_indexes_extended = dataUtils.pad_to_batch_size(dev_x_indexes,hyperparas['batch_size']) dev_y_extended = dataUtils.pad_to_batch_size(dev_y,hyperparas['batch_size']) n_dev_batches = dev_x_indexes_extended.shape[0] / hyperparas['batch_size'] n_dev_samples = len(dev_y) dataUtils.extend_lenghts(dev_lengths,hyperparas['batch_size']) # test data test_x_indexes_extended = dataUtils.pad_to_batch_size(test_x_indexes,hyperparas['batch_size']) test_y_extended = dataUtils.pad_to_batch_size(test_y,hyperparas['batch_size']) n_test_batches = test_x_indexes_extended.shape[0] / hyperparas['batch_size'] n_test_samples = len(test_y) dataUtils.extend_lenghts(test_lengths,hyperparas['batch_size']) ###################### # BUILD ACTUAL MODEL # ######################