def test_text(backend_default): text_data = ('Lorem ipsum dolor sit amet, consectetur adipisicing elit, ' 'sed do eiusmod tempor incididunt ut labore et dolore magna ' 'aliqua. Ut enim ad minim veniam, quis nostrud exercitation ' 'ullamco laboris nisi ut aliquip ex ea commodo consequat. ' 'Duis aute irure dolor in reprehenderit in voluptate velit ' 'esse cillum dolore eu fugiat nulla pariatur. Excepteur sint ' 'occaecat cupidatat non proident, sunt in culpa qui officia ' 'deserunt mollit anim id est laborum.') data_path = 'tmp_test_text_data' with open(data_path, 'w') as f: f.write(text_data) NervanaObject.be.bsz = 4 time_steps = 6 valid_split = 0.2 # load data and parse on character-level train_path, valid_path = Text.create_valid_file(data_path, valid_split=valid_split) train_set = Text(time_steps, train_path) valid_set = Text(time_steps, valid_path, vocab=train_set.vocab) train_set.be = NervanaObject.be bsz = train_set.be.bsz for i, (X_batch, y_batch) in enumerate(train_set): if i > 2: break chars = [ train_set.index_to_token[x] for x in np.argmax(X_batch.get(), axis=0).tolist() ] # First sent of first batch will be contiguous with first sent of next # batch for batch in range(bsz): sent = ''.join(chars[batch::bsz]) start = i * time_steps + batch * time_steps * train_set.nbatches sent_ref = text_data[start:start + time_steps] assert sent == sent_ref valid_start = int(len(text_data) * (1 - valid_split)) for i, (X_batch, y_batch) in enumerate(valid_set): if i > 2: break chars = [ train_set.index_to_token[x] for x in np.argmax(X_batch.get(), axis=0).tolist() ] for batch in range(bsz): sent = ''.join(chars[batch::bsz]) start = i*time_steps + batch * time_steps * \ valid_set.nbatches + valid_start sent_ref = text_data[start:start + time_steps] assert sent == sent_ref os.remove(data_path) os.remove(train_path) os.remove(valid_path)
def test_text(backend_default): text_data = ( 'Lorem ipsum dolor sit amet, consectetur adipisicing elit, ' 'sed do eiusmod tempor incididunt ut labore et dolore magna ' 'aliqua. Ut enim ad minim veniam, quis nostrud exercitation ' 'ullamco laboris nisi ut aliquip ex ea commodo consequat. ' 'Duis aute irure dolor in reprehenderit in voluptate velit ' 'esse cillum dolore eu fugiat nulla pariatur. Excepteur sint ' 'occaecat cupidatat non proident, sunt in culpa qui officia ' 'deserunt mollit anim id est laborum.' ) data_path = 'tmp_test_text_data' with open(data_path, 'w') as f: f.write(text_data) NervanaObject.be.bsz = 4 time_steps = 6 valid_split = 0.2 # load data and parse on character-level train_path, valid_path = Text.create_valid_file( data_path, valid_split=valid_split) train_set = Text(time_steps, train_path) valid_set = Text(time_steps, valid_path, vocab=train_set.vocab) train_set.be = NervanaObject.be bsz = train_set.be.bsz for i, (X_batch, y_batch) in enumerate(train_set): if i > 2: break chars = [train_set.index_to_token[x] for x in np.argmax(X_batch.get(), axis=0).tolist()] # First sent of first batch will be contiguous with first sent of next # batch for batch in range(bsz): sent = ''.join(chars[batch::bsz]) start = i*time_steps + batch * time_steps * train_set.nbatches sent_ref = text_data[start:start+time_steps] assert sent == sent_ref valid_start = int(len(text_data) * (1 - valid_split)) for i, (X_batch, y_batch) in enumerate(valid_set): if i > 2: break chars = [train_set.index_to_token[x] for x in np.argmax(X_batch.get(), axis=0).tolist()] for batch in range(bsz): sent = ''.join(chars[batch::bsz]) start = i*time_steps + batch * time_steps * \ valid_set.nbatches + valid_start sent_ref = text_data[start:start+time_steps] assert sent == sent_ref os.remove(data_path) os.remove(train_path) os.remove(valid_path)
def vectorize_stories(self, data): """ Convert (story, query, answer) word data into vectors. Args: data (tuple) : Tuple of story, query, answer word data. Returns: tuple : Tuple of story, query, answer vectors. """ s, q, a = [], [], [] for story, query, answer in data: s.append(self.words_to_vector(story)) q.append(self.words_to_vector(query)) a.append(self.one_hot_vector(answer)) s = Text.pad_sentences(s, self.story_maxlen) q = Text.pad_sentences(q, self.query_maxlen) a = np.array(a) return (s, q, a)
# download penn treebank train_path = load_ptb_train(path=args.data_dir) valid_path = load_ptb_test(path=args.data_dir) # define a custom function to parse the input into individual tokens, which for # this data, splits into individual words. This can be passed into the Text # object during dataset creation as seen below. def tokenizer(s): return s.replace('\n', '<eos>').split() # load data and parse on word-level train_set = Text(time_steps, train_path, tokenizer=tokenizer, onehot_input=False) valid_set = Text(time_steps, valid_path, vocab=train_set.vocab, tokenizer=tokenizer, onehot_input=False) # weight initialization init = Uniform(low=-0.1, high=0.1) # model initialization rlayer_params = { "output_size": hidden_size, "init": init, "activation": Tanh(),
print ex_answer while True: # ask user for story and question story_lines = [] line = raw_input("\nPlease enter a story:\n") while line != "": story_lines.append(line) line = raw_input() story = ("\n".join(story_lines)).strip() question = raw_input("Please enter a question:\n") # convert user input into a suitable network input vectorize = lambda words, max_len: \ be.array(Text.pad_sentences([babi.words_to_vector(BABI.tokenize(words))], max_len)) s = vectorize(story, babi.story_maxlen) q = vectorize(question, babi.query_maxlen) # get prediction probabilities with forward propagation probs = model_inference.fprop(x=(s, q), inference=True).get() # get top k answers top_k = -min(5, babi.vocab_size) max_indices = np.argpartition(probs, top_k, axis=0)[top_k:] max_probs = probs[max_indices] sorted_idx = max_indices[np.argsort(max_probs, axis=0)] print "\nAnswer:" for idx in reversed(sorted_idx): idx = int(idx)
# these hyperparameters are from the paper args.batch_size = 50 time_steps = 150 hidden_size = 500 gradient_clip_value = None # setup backend be = gen_backend(**extract_valid_args(args, gen_backend)) # download penn treebank train_path = load_ptb_train(path=args.data_dir) valid_path = load_ptb_test(path=args.data_dir) # load data and parse on character-level train_set = Text(time_steps, train_path) valid_set = Text(time_steps, valid_path, vocab=train_set.vocab) # weight initialization init = Uniform(low=-0.08, high=0.08) # model initialization layers = [Recurrent(hidden_size, init, activation=Tanh()), Affine(len(train_set.vocab), init, bias=init, activation=Softmax())] cost = GeneralizedCost(costfunc=CrossEntropyMulti(usebits=True)) model = Model(layers=layers) optimizer = RMSProp(gradient_clip_value=gradient_clip_value, stochastic_round=args.rounding)