rnd = random.random() for i,p in enumerate(dist): rnd -= p if rnd <= 0: break res.append(i) cw = i if cw == stop: break if nchars and len(res) > nchars: break return res if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('corpus', help='Path to the corpus file.') args = parser.parse_args() train = util.CharsCorpusReader(args.corpus, begin="<s>") vocab = util.Vocab.from_corpus(train) VOCAB_SIZE = vocab.size() model = Model() trainer = SimpleSGDTrainer(model) #lm = RNNLanguageModel(model, LAYERS, INPUT_DIM, HIDDEN_DIM, VOCAB_SIZE, builder=SimpleRNNBuilder) lm = RNNLanguageModel(model, LAYERS, INPUT_DIM, HIDDEN_DIM, VOCAB_SIZE, builder=LSTMBuilder) train = list(train) chars = loss = 0.0 for ITER in range(100): random.shuffle(train)
r_t = bias + (R * y_t) ydist = softmax(r_t) dist = ydist.vec_value() rnd = random.random() for i, p in enumerate(dist): rnd -= p if rnd <= 0: break res.append(i) cw = i if cw == stop: break if nchars and len(res) > nchars: break return res if __name__ == '__main__': train = util.CharsCorpusReader(sys.argv[1], begin="<s>") vocab = util.Vocab.from_corpus(train) VOCAB_SIZE = vocab.size() model = Model() sgd = SimpleSGDTrainer(model) #lm = RNNLanguageModel(model, builder=LSTMBuilder) lm = RNNLanguageModel(model, LAYERS, INPUT_DIM, HIDDEN_DIM, VOCAB_SIZE, builder=SimpleRNNBuilder)
if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('train', help='Path to the corpus file.') parser.add_argument('dev', help='Path to the validation corpus file.') parser.add_argument('test', help='Path to the test corpus file.') parser.add_argument( '--print_probs', action="store_true", help= 'whether to print the probabilities per word over the validation set') parser.add_argument('--perform_train', action="store_true", help='whether to perform training') args, unknown = parser.parse_known_args() train = util.CharsCorpusReader(args.train, begin="<s>") dev = util.CharsCorpusReader(args.dev, begin="<s>") test = util.CharsCorpusReader(args.test, begin="<s>") vocab = util.Vocab.from_corpus(train) VOCAB_SIZE = vocab.size() model = dy.Model() trainer = dy.SimpleSGDTrainer(model, learning_rate=1.0) lm = RNNLanguageModel(model, LAYERS, INPUT_DIM, HIDDEN_DIM, VOCAB_SIZE,