def predict_sentence(sentence, nn_model, w2v_model, index_to_token, diversity=0.5): input_sequence = tokenize(sentence + ' ' + EOS_SYMBOL) tokens_sequence = _predict_sequence(input_sequence, nn_model, w2v_model, index_to_token, diversity) predicted_sentence = ' '.join(tokens_sequence) return predicted_sentence
def get_tokenized_dialog_lines(iterable_dialog_lines): for line in iterable_dialog_lines: tokenized_dialog_line = tokenize(line) tokenized_dialog_line.append(EOS_SYMBOL) yield tokenized_dialog_line
eeg_topo_data[i, :, 4, 0] = data[:, 4] eeg_topo_data[i, :, 6 ,0] = data[:, 5] eeg_topo_data[i, :, 8, 3] = data[:, 6] eeg_topo_data[i, :, 8, 5] = data[:, 7] eeg_topo_data[i, :, 6, 8] = data[:, 8] eeg_topo_data[i, :, 4, 8] = data[:, 9] eeg_topo_data[i, :, 3, 7] = data[:, 10] eeg_topo_data[i, :, 2, 6] = data[:, 11] eeg_topo_data[i, :, 2, 8] = data[:, 12] eeg_topo_data[i, :, 1, 5] = data[:, 13] ground_truth_labels = torch.zeros(ground_truth.shape[0]) labels = [] for i, data in enumerate(ground_truth): ground_truth_labels[i], name = tokenize(data) labels.append(name) print(sorted(Counter(list(labels)).items())) # create dataset, split dataset to train_set and test_set dataset = Dataset(eeg_topo_data, ecg_data, gsr_data, ground_truth_labels) test_split = 0.3 num_data = len(dataset) num_test = int(num_data*test_split) num_train = num_data - num_test train_set, test_set = torch.utils.data.random_split(dataset, [num_train, num_test]) train_data = torch.utils.data.DataLoader(train_set, batch_size=128, shuffle=True, num_workers=2)