def prepare(self, loc_count, user_count, hidden_size, gru_factory, device): self.hidden_size = hidden_size if self.use_user_embedding: self.model = RNN_user(loc_count, user_count, hidden_size, gru_factory).to(device) else: self.model = RNN(loc_count, hidden_size, gru_factory).to(device)
def load_checkpoint(filename, dir=GOOGLE_COLAB_CHECKPOINT_DIR): checkpoint = torch.load(os.path.join(dir, filename)) model = RNN(hidden_size=checkpoint['hidden_size'], output_size=checkpoint['output_size'], n_layers=checkpoint['n_layers'], batch_size=checkpoint['batch_size'], bidirectional=checkpoint['bidirectional']) model.load_state_dict(checkpoint['state_dict']) model.num_epochs_trained = checkpoint['num_epochs_trained'] return model
def main(): with open('data/wsj00-18.pos') as f: training_data = f.readlines() with open('data/wsj19-21.pos') as f: test_data = f.readlines() model = word2vec.KeyedVectors.load_word2vec_format( './data/GoogleNews-vectors-negative300.bin', binary=True) print(model.wv.most_similar(positive=['woman', 'king'], negative=['man'])) # print(model['hgoehgoehgoehg']) # print(len(model['hogehgoehgoe'])) labels = ('NNP', ',', 'CD', 'NNS', 'JJ', 'MD', 'VB', 'DT', 'NN', 'IN', '.', 'VBZ', 'VBG', 'CC', 'VBD', 'VBN', 'RB', 'TO', 'PRP', 'RBR', 'WDT', 'VBP', 'RP', 'PRP$', 'JJS', 'POS', '``', 'EX', "''", 'WP', ':', 'JJR', 'WRB', '$', 'NNPS', 'WP$', '-LRB-', '-RRB-', 'PDT', 'RBS', 'FW', 'UH', 'SYM', 'LS', '#') rnn = RNN(300, 1000, labels) training_vector_data = [line for line in training_data] test_vector_data = [line for line in test_data] manager = NetworkEvaluator(rnn, training_vector_data, test_vector_data)
def run(checkpoint=None, dir=CHECKPOINT_DIR): gc.collect() batch_size = 1 device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # torch.autograd.set_detect_anomaly(True) clean_train_loader = DataLoader(clean_train_data, batch_size=batch_size, shuffle=True, num_workers=2, pin_memory=True) clean_test_loader = DataLoader(clean_test_data, batch_size=batch_size, shuffle=False, num_workers=2, pin_memory=True) if checkpoint: model = load_checkpoint(checkpoint, dir=dir).to(device) else: model = RNN(hidden_size=80, output_size=5, n_layers=2, batch_size=batch_size, bidirectional=False).to(device) optimizer = torch.optim.Adam(model.parameters(), lr=0.001) loss = torch.nn.CrossEntropyLoss() losses = train(clean_train_loader, clean_test_loader, 500, model, optimizer, loss, device, checkpoint_dir=CHECKPOINT_DIR) return model, losses
class BprTrainer(Trainer): def greeter(self): if not self.use_user_embedding: return 'Use BPR training.' return 'Use BPR training with user embeddings.' def prepare(self, loc_count, user_count, hidden_size, gru_factory, device): self.hidden_size = hidden_size if self.use_user_embedding: self.model = RNN_user(loc_count, user_count, hidden_size, gru_factory).to(device) else: self.model = RNN(loc_count, hidden_size, gru_factory).to(device) def evaluate(self, x, t, s, y_t, y_s, h, active_users): seq_length = x.shape[0] user_length = x.shape[1] out, h = self.model(x, h, active_users) out_t = out.transpose(0, 1) response = [] Q = self.model.encoder.weight for j in range(user_length): out_j = out_t[j].transpose(0, 1) o = torch.matmul(Q, out_j).cpu().detach() o = o.transpose(0, 1) o = o.contiguous().view(seq_length, -1) response.append(o) return response, h def loss(self, x, t, s, y, y_t, y_s, h, active_users): out, h = self.model(x, h, active_users) y_emb = self.model.encoder(y) # reshape out = out.view(-1, self.hidden_size) out_t = out.transpose(0, 1) y_emb = y_emb.contiguous().view(-1, self.hidden_size) Q = self.model.encoder.weight neg_o = torch.matmul(Q, out_t) pos_o = torch.matmul(y_emb, out_t).diag() l = torch.log(1 + torch.exp(-(pos_o - neg_o))) l = torch.mean(l) return l, h
class RESTService: def __init__(self, host='0.0.0.0', port=8080): self._host = host self._port = port self._model = RNN() self._app = Bottle() self._route() def _route(self): self._app.route('/predict', method="POST", callback=self._predict) def _predict(self): data = request.json category = self._model.predict(data) response.content_type = 'text/plain' return str(category) def start(self): self._app.run(host=self._host, port=self._port)
def setUpClass(cls): cls.W_in = np.eye(2) cls.W_rec = np.eye(2) cls.W_out = np.eye(2) cls.W_FB = -np.ones((2, 2)) + np.eye(2) cls.b_rec = np.zeros(2) cls.b_out = np.zeros(2) cls.rnn = RNN(cls.W_in, cls.W_rec, cls.W_out, cls.b_rec, cls.b_out, activation=identity, alpha=1, output=softmax, loss=softmax_cross_entropy) cls.rnn.a = np.ones(2) cls.rnn.error = np.ones(2) * 0.5
def test_mimic_task(self): """Verifies that the proper RNN output is returned as label in a simple case where the RNN simply counts the number of time steps.""" from network import RNN from functions import identity, mean_squared_error n_in = 2 n_h = 2 n_out = 2 W_in_target = np.eye(n_in) W_rec_target = np.eye(n_h) W_out_target = np.eye(n_out) b_rec_target = np.zeros(n_h) b_out_target = np.zeros(n_out) alpha = 1 rnn_target = RNN(W_in_target, W_rec_target, W_out_target, b_rec_target, b_out_target, activation=identity, alpha=alpha, output=identity, loss=mean_squared_error) task = Mimic_RNN(rnn_target, p_input=1, tau_task=1) data = task.gen_data(100, 0) y = np.arange(1, 101) y_correct = np.array([y, y]).T self.assertTrue(np.isclose(data['train']['Y'], y_correct).all())
def train_rnn(file, batch_size, layers, learning_rate, dropout, num_steps, cell_size, epochs, cell, test_seed, delim, save): """ Train neural network """ model_name = "cell-{}-size-{}-batch-{}-steps-{}-layers-{}-lr-{}-dropout-{}".format( cell, cell_size, batch_size, num_steps, layers, learning_rate, dropout) ds = Dataset(file, batch_size=batch_size, num_steps=num_steps, with_delim=delim) n = RNN(data=ds, cell=cell, num_layers=layers, dropout=dropout, learning_rate=learning_rate, cell_size=cell_size, num_epochs=epochs) n.train(save=save, model_name=model_name, test_output=True, test_seed=test_seed, with_delim=delim) if save: n.save(model_name)
def __init__(self, host='0.0.0.0', port=8080): self._host = host self._port = port self._model = RNN() self._app = Bottle() self._route()
from torch.utils.data import DataLoader import numpy as np import matplotlib.pyplot as plt from torch.autograd import Variable from dataset import dataset, dataset2 from network import RNN import os model_dir = '/home/lixiaoyu/project/airQuality/Analysis-of-Air-Quality-and-Outpatient-Quantity/ckpt/' TIME_STEP = 120 INPUT_SIZE = 7 HIDDEN_SIZE = 32 LR = 0.01 EPOCH = 1000 rnn = RNN(INPUT_SIZE=INPUT_SIZE, HIDDEN_SIZE=HIDDEN_SIZE) optimizer = torch.optim.Adam(rnn.parameters(), lr=LR) loss_func = nn.MSELoss() LoadModel = False def train(train_loader, num_e): torch.manual_seed(1) if LoadModel: checkpoint = torch.load(model_dir + '{}.ckpt'.format(num_e)) rnn.load_state_dict(checkpoint['state_dict']) print('Loading model~~~~~~~~~~', num_e) for e in range(EPOCH): print('epoch>>>>>>> ', e)
def test(word2vec, dataset, parameters, loadpath): print "1" device_string = "/gpu:{}".format( parameters["gpu"]) if parameters["gpu"] else "/cpu:0" with tf.device(device_string): print "2" gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.8) config_proto = tf.ConfigProto(allow_soft_placement=True, gpu_options=gpu_options) sess = tf.Session(config=config_proto) premises_ph = tf.placeholder(tf.float32, shape=[ parameters["sequence_length"], None, parameters["embedding_dim"] ], name="premises") hypothesis_ph = tf.placeholder(tf.float32, shape=[ parameters["sequence_length"], None, parameters["embedding_dim"] ], name="hypothesis") targets_ph = tf.placeholder(tf.int32, shape=[None], name="targets") keep_prob_ph = tf.placeholder(tf.float32, name="keep_prob") _projecter = TensorFlowTrainable() projecter = _projecter.get_4Dweights( filter_height=1, filter_width=parameters["embedding_dim"], in_channels=1, out_channels=parameters["num_units"], name="projecter") with tf.variable_scope(name_or_scope="premise"): premise = RNN(cell=LSTMCell, num_units=parameters["num_units"], embedding_dim=parameters["embedding_dim"], projecter=projecter, keep_prob=keep_prob_ph) premise.process(sequence=premises_ph) with tf.variable_scope(name_or_scope="hypothesis"): hypothesis = RNN(cell=AttentionLSTMCell, num_units=parameters["num_units"], embedding_dim=parameters["embedding_dim"], hiddens=premise.hiddens, states=premise.states, projecter=projecter, keep_prob=keep_prob_ph) hypothesis.process(sequence=hypothesis_ph) loss, loss_summary, accuracy, accuracy_summary = hypothesis.loss( targets=targets_ph) loader = tf.train.Saver() loader.restore(sess, loadpath) batcher = Batcher(word2vec=word2vec, settings=parameters) test_batches = batcher.batch_generator( dataset=dataset["test"], num_epochs=1, batch_size=parameters["batch_size"]["test"], sequence_length=parameters["sequence_length"]) print "2.5" for test_step, (test_batch, _) in enumerate(test_batches): print "3" feed_dict = { premises_ph: np.transpose(test_batch["premises"], (1, 0, 2)), hypothesis_ph: np.transpose(test_batch["hypothesis"], (1, 0, 2)), targets_ph: test_batch["targets"], keep_prob_ph: 1., } test_loss, test_accuracy = sess.run([loss, accuracy], feed_dict=feed_dict) print "\nTEST | loss={0:.2f}, accuracy={1:.2f}% ".format( test_loss, 100. * test_accuracy) print ""
training_loader = DataLoader(training_set, args.batch_size, shuffle=True) validation_data, validation_labels = joblib.load('%s/validation-%d.data' % (args.datapath, args.n)) validation_data = th.from_numpy(validation_data) validation_labels = onehot_sequence(th.from_numpy(validation_labels), 10, cuda) validation_set = TensorDataset(validation_data, validation_labels) validation_loader = DataLoader(validation_set, args.batch_size) test_data, test_labels = joblib.load('%s/test-%d.data' % (args.datapath, args.n)) test_data = th.from_numpy(test_data) test_labels = onehot_sequence(th.from_numpy(test_labels), 10, cuda) test_set = TensorDataset(test_data, test_labels) test_loader = DataLoader(test_set, args.batch_size) cnn_path = args.pretrained_cnn_path if args.pretrained_cnn else None model = RNN(args.n_units, 10, cnn_path, cuda) if args.gpu > -1: model.cuda() if args.criterion == 'regression_loss': from criterions import regression_loss criterion = regression_loss(args.entropy_scale) else: criterion = getattr(__import__('criterions'), args.criterion)() if args.gpu > -1: criterion.cuda() optimizer = Adam(model.parameters(), lr=1e-3) vis = visdom.Visdom() tb_path = args.tensorboard_path if args.tensorboard_log: tb_path += '/%s' % args.tensorboard_log TensorboardVisualizer.configure(tb_path)
def train(word2vec, dataset, parameters): modeldir = os.path.join(parameters["runs_dir"], parameters["model_name"]) if not os.path.exists(modeldir): os.mkdir(modeldir) logdir = os.path.join(modeldir, "log") if not os.path.exists(logdir): os.mkdir(logdir) logdir_train = os.path.join(logdir, "train") if not os.path.exists(logdir_train): os.mkdir(logdir_train) logdir_test = os.path.join(logdir, "test") if not os.path.exists(logdir_test): os.mkdir(logdir_test) logdir_dev = os.path.join(logdir, "dev") if not os.path.exists(logdir_dev): os.mkdir(logdir_dev) savepath = os.path.join(modeldir, "save") device_string = "/gpu:{}".format(parameters["gpu"]) if parameters["gpu"] else "/cpu:0" with tf.device(device_string): gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.8) config_proto = tf.ConfigProto(allow_soft_placement=True, gpu_options=gpu_options) sess = tf.Session(config=config_proto) premises_ph = tf.placeholder(tf.float32, shape=[parameters["sequence_length"], None, parameters["embedding_dim"]], name="premises") hypothesis_ph = tf.placeholder(tf.float32, shape=[parameters["sequence_length"], None, parameters["embedding_dim"]], name="hypothesis") targets_ph = tf.placeholder(tf.int32, shape=[None], name="targets") keep_prob_ph = tf.placeholder(tf.float32, name="keep_prob") _projecter = TensorFlowTrainable() projecter = _projecter.get_4Dweights(filter_height=1, filter_width=parameters["embedding_dim"], in_channels=1, out_channels=parameters["num_units"], name="projecter") optimizer = tf.train.AdamOptimizer(learning_rate=parameters["learning_rate"], name="ADAM", beta1=0.9, beta2=0.999) with tf.variable_scope(name_or_scope="premise"): premise = RNN(cell=LSTMCell, num_units=parameters["num_units"], embedding_dim=parameters["embedding_dim"], projecter=projecter, keep_prob=keep_prob_ph) premise.process(sequence=premises_ph) with tf.variable_scope(name_or_scope="hypothesis"): hypothesis = RNN(cell=AttentionLSTMCell, num_units=parameters["num_units"], embedding_dim=parameters["embedding_dim"], hiddens=premise.hiddens, states=premise.states, projecter=projecter, keep_prob=keep_prob_ph) hypothesis.process(sequence=hypothesis_ph) loss, loss_summary, accuracy, accuracy_summary = hypothesis.loss(targets=targets_ph) weight_decay = tf.reduce_sum([tf.reduce_sum(parameter) for parameter in premise.parameters + hypothesis.parameters]) global_loss = loss + parameters["weight_decay"] * weight_decay train_summary_op = tf.merge_summary([loss_summary, accuracy_summary]) train_summary_writer = tf.train.SummaryWriter(logdir_train, sess.graph) test_summary_op = tf.merge_summary([loss_summary, accuracy_summary]) test_summary_writer = tf.train.SummaryWriter(logdir_test) saver = tf.train.Saver(max_to_keep=10) summary_writer = tf.train.SummaryWriter(logdir) tf.train.write_graph(sess.graph_def, modeldir, "graph.pb", as_text=False) loader = tf.train.Saver(tf.all_variables()) optimizer = tf.train.AdamOptimizer(learning_rate=parameters["learning_rate"], name="ADAM", beta1=0.9, beta2=0.999) train_op = optimizer.minimize(global_loss) sess.run(tf.initialize_all_variables()) batcher = Batcher(word2vec=word2vec) train_batches = batcher.batch_generator(dataset=dataset["train"], num_epochs=parameters["num_epochs"], batch_size=parameters["batch_size"]["train"], sequence_length=parameters["sequence_length"]) num_step_by_epoch = int(math.ceil(len(dataset["train"]["targets"]) / parameters["batch_size"]["train"])) for train_step, (train_batch, epoch) in enumerate(train_batches): feed_dict = { premises_ph: np.transpose(train_batch["premises"], (1, 0, 2)), hypothesis_ph: np.transpose(train_batch["hypothesis"], (1, 0, 2)), targets_ph: train_batch["targets"], keep_prob_ph: parameters["keep_prob"], } _, summary_str, train_loss, train_accuracy = sess.run([train_op, train_summary_op, loss, accuracy], feed_dict=feed_dict) train_summary_writer.add_summary(summary_str, train_step) if train_step % 100 == 0: sys.stdout.write("\rTRAIN | epoch={0}/{1}, step={2}/{3} | loss={4:.2f}, accuracy={5:.2f}% ".format(epoch + 1, parameters["num_epochs"], train_step % num_step_by_epoch, num_step_by_epoch, train_loss, 100. * train_accuracy)) sys.stdout.flush() if train_step % 5000 == 0: test_batches = batcher.batch_generator(dataset=dataset["test"], num_epochs=1, batch_size=parameters["batch_size"]["test"], sequence_length=parameters["sequence_length"]) for test_step, (test_batch, _) in enumerate(test_batches): feed_dict = { premises_ph: np.transpose(test_batch["premises"], (1, 0, 2)), hypothesis_ph: np.transpose(test_batch["hypothesis"], (1, 0, 2)), targets_ph: test_batch["targets"], keep_prob_ph: 1., } summary_str, test_loss, test_accuracy = sess.run([test_summary_op, loss, accuracy], feed_dict=feed_dict) print"\nTEST | loss={0:.2f}, accuracy={1:.2f}% ".format(test_loss, 100. * test_accuracy) print "" test_summary_writer.add_summary(summary_str, train_step) break if train_step % 5000 == 0: saver.save(sess, save_path=savepath, global_step=train_step) print ""
def train(word2vec, dataset, parameters): modeldir = os.path.join(parameters["runs_dir"], parameters["model_name"]) if not os.path.exists(modeldir): os.mkdir(modeldir) logdir = os.path.join(modeldir, "log") if not os.path.exists(logdir): os.mkdir(logdir) logdir_train = os.path.join(logdir, "train") if not os.path.exists(logdir_train): os.mkdir(logdir_train) logdir_test = os.path.join(logdir, "test") if not os.path.exists(logdir_test): os.mkdir(logdir_test) logdir_dev = os.path.join(logdir, "dev") if not os.path.exists(logdir_dev): os.mkdir(logdir_dev) savepath = os.path.join(modeldir, "save") device_string = "/gpu:{}".format( parameters["gpu"]) if parameters["gpu"] else "/cpu:0" with tf.device(device_string): gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.8) config_proto = tf.ConfigProto(allow_soft_placement=True, gpu_options=gpu_options) sess = tf.Session(config=config_proto) premises_ph = tf.placeholder(tf.float32, shape=[ parameters["sequence_length"], None, parameters["embedding_dim"] ], name="premises") hypothesis_ph = tf.placeholder(tf.float32, shape=[ parameters["sequence_length"], None, parameters["embedding_dim"] ], name="hypothesis") targets_ph = tf.placeholder(tf.int32, shape=[None], name="targets") keep_prob_ph = tf.placeholder(tf.float32, name="keep_prob") _projecter = TensorFlowTrainable() projecter = _projecter.get_4Dweights( filter_height=1, filter_width=parameters["embedding_dim"], in_channels=1, out_channels=parameters["num_units"], name="projecter") # optimizer = tf.train.AdamOptimizer(learning_rate=parameters["learning_rate"], name="ADAM", beta1=0.9, beta2=0.999) with tf.variable_scope(name_or_scope="premise"): premise = RNN(cell=LSTMCell, num_units=parameters["num_units"], embedding_dim=parameters["embedding_dim"], projecter=projecter, keep_prob=keep_prob_ph) premise.process(sequence=premises_ph) with tf.variable_scope(name_or_scope="hypothesis"): hypothesis = RNN(cell=AttentionLSTMCell, num_units=parameters["num_units"], embedding_dim=parameters["embedding_dim"], hiddens=premise.hiddens, states=premise.states, projecter=projecter, keep_prob=keep_prob_ph) hypothesis.process(sequence=hypothesis_ph) loss, loss_summary, accuracy, accuracy_summary = hypothesis.loss( targets=targets_ph) weight_decay = tf.reduce_sum([ tf.reduce_sum(parameter) for parameter in premise.parameters + hypothesis.parameters ]) global_loss = loss + parameters["weight_decay"] * weight_decay train_summary_op = tf.merge_summary([loss_summary, accuracy_summary]) # train_summary_op = tf.summary.merge([loss_summary, accuracy_summary]) train_summary_writer = tf.train.SummaryWriter(logdir_train, sess.graph) # train_summary_writer = tf.summary.FileWriter(logdir_train, sess.graph) # test_summary_op = tf.merge_summary([loss_summary, accuracy_summary]) dev_summary_op = tf.merge_summary([loss_summary, accuracy_summary]) # test_summary_writer = tf.train.SummaryWriter(logdir_test) dev_summary_writer = tf.train.SummaryWriter(logdir_dev) saver = tf.train.Saver(max_to_keep=10) # summary_writer = tf.train.SummaryWriter(logdir) tf.train.write_graph(sess.graph_def, modeldir, "graph.pb", as_text=False) optimizer = tf.train.AdamOptimizer( learning_rate=parameters["learning_rate"], name="ADAM", beta1=0.9, beta2=0.999) train_op = optimizer.minimize(global_loss) sess.run(tf.initialize_all_variables()) # sess.run(tf.global_variables_initializer()) batcher = Batcher(word2vec=word2vec, settings=parameters) #train_split = "train" #train_batches = batcher.batch_generator(dataset=dataset[train_split], num_epochs=parameters["num_epochs"], # batch_size=parameters["batch_size"]["train"], # sequence_length=parameters["sequence_length"]) #print("train data size: %d" % len(dataset["train"]["targets"])) #num_step_by_epoch = int(math.ceil(len(dataset[train_split]["targets"]) / parameters["batch_size"]["train"])) #best_dev_accuracy = 0 print("train data size: %d" % len(dataset["train"]["targets"])) best_dev_accuracy = 0.0 total_loss = 0.0 timestamp = time.time() for epoch in range(parameters["num_epochs"]): print("epoch %d" % epoch) train_batches = batcher.batch_generator( dataset=dataset["train"], num_epochs=1, batch_size=parameters["batch_size"]["train"], sequence_length=parameters["sequence_length"]) steps = len(dataset["train"] ["targets"]) / parameters["batch_size"]["train"] # progress bar http://stackoverflow.com/a/3002114 bar = progressbar.ProgressBar(maxval=steps / 10 + 1, widgets=[ progressbar.Bar('=', '[', ']'), ' ', progressbar.Percentage() ]) bar.start() for step, (train_batch, train_epoch) in enumerate(train_batches): feed_dict = { premises_ph: np.transpose(train_batch["premises"], (1, 0, 2)), hypothesis_ph: np.transpose(train_batch["hypothesis"], (1, 0, 2)), targets_ph: train_batch["targets"], keep_prob_ph: parameters["keep_prob"], } _, summary_str, train_loss, train_accuracy = sess.run( [train_op, train_summary_op, loss, accuracy], feed_dict=feed_dict) total_loss += train_loss train_summary_writer.add_summary(summary_str, step) if step % 100 == 0: # eval 1 random dev batch # eval 1 random dev batch dev_batches = batcher.batch_generator( dataset=dataset["dev"], num_epochs=1, batch_size=parameters["batch_size"]["dev"], sequence_length=parameters["sequence_length"]) for dev_step, (dev_batch, _) in enumerate(dev_batches): feed_dict = { premises_ph: np.transpose(dev_batch["premises"], (1, 0, 2)), hypothesis_ph: np.transpose(dev_batch["hypothesis"], (1, 0, 2)), targets_ph: dev_batch["targets"], keep_prob_ph: 1., } summary_str, dev_loss, dev_accuracy = sess.run( [dev_summary_op, loss, accuracy], feed_dict=feed_dict) dev_summary_writer.add_summary(summary_str, step) break bar.update(step / 10 + 1) bar.finish() # eval on all dev dev_batches = batcher.batch_generator( dataset=dataset["dev"], num_epochs=1, batch_size=len(dataset["dev"]["targets"]), sequence_length=parameters["sequence_length"]) dev_accuracy = 0 for dev_step, (dev_batch, _) in enumerate(dev_batches): feed_dict = { premises_ph: np.transpose(dev_batch["premises"], (1, 0, 2)), hypothesis_ph: np.transpose(dev_batch["hypothesis"], (1, 0, 2)), targets_ph: dev_batch["targets"], keep_prob_ph: 1., } summary_str, dev_loss, dev_accuracy = sess.run( [dev_summary_op, loss, accuracy], feed_dict=feed_dict) print "\nDEV full | loss={0:.2f}, accuracy={1:.2f}% ".format( dev_loss, 100. * dev_accuracy) print "" if dev_accuracy > best_dev_accuracy: saver.save(sess, save_path=savepath + '_best', global_step=(epoch + 1) * steps) break saver.save(sess, save_path=savepath, global_step=(epoch + 1) * steps) current_time = time.time() print("Iter %3d Loss %-8.3f Dev Acc %-6.2f Time %-5.2f at %s" % (epoch, total_loss, dev_accuracy, (current_time - timestamp) / 60.0, str(datetime.datetime.now()))) total_loss = 0.0 print ""
""" make predictions using the network """ import torch import torch.nn import os import dataset from network import RNN dataset = dataset.Dataset() max_length = 20 rnn = RNN(dataset.n_letters, 128, dataset.n_letters, dataset.n_categories) rnn.eval() # load weights if os.path.exists("models/gen_names.pkl"): checkpoint = torch.load("models/gen_names.pkl") rnn.load_state_dict(checkpoint['nn_state_dict']) print("checkpoint loaded") def sample(category, start_char): with torch.no_grad(): category_tensor_var = dataset.category_tensor(category) input = dataset.input_tensor(start_char) hidden = rnn.init_hidden() output_name = start_char
def test_small_lr_case(self): alpha = 1 self.rnn_1 = RNN(self.W_in, self.W_rec, self.W_out, self.b_rec, self.b_out, activation=tanh, alpha=alpha, output=softmax, loss=softmax_cross_entropy) self.rnn_2 = RNN(self.W_in, self.W_rec, self.W_out, self.b_rec, self.b_out, activation=tanh, alpha=alpha, output=softmax, loss=softmax_cross_entropy) self.rnn_3 = RNN(self.W_in, self.W_rec, self.W_out, self.b_rec, self.b_out, activation=tanh, alpha=alpha, output=softmax, loss=softmax_cross_entropy) lr = 0.00001 self.optimizer_1 = Stochastic_Gradient_Descent(lr=lr) self.learn_alg_1 = RTRL(self.rnn_1) self.optimizer_2 = Stochastic_Gradient_Descent(lr=lr) self.learn_alg_2 = Future_BPTT(self.rnn_2, 25) self.optimizer_3 = Stochastic_Gradient_Descent(lr=lr) self.learn_alg_3 = Efficient_BPTT(self.rnn_3, 100) monitors = [] np.random.seed(1) self.sim_1 = Simulation(self.rnn_1) self.sim_1.run(self.data, learn_alg=self.learn_alg_1, optimizer=self.optimizer_1, monitors=monitors, verbose=False) np.random.seed(1) self.sim_2 = Simulation(self.rnn_2) self.sim_2.run(self.data, learn_alg=self.learn_alg_2, optimizer=self.optimizer_2, monitors=monitors, verbose=False) np.random.seed(1) self.sim_3 = Simulation(self.rnn_3) self.sim_3.run(self.data, learn_alg=self.learn_alg_3, optimizer=self.optimizer_3, monitors=monitors, verbose=False) #Assert networks learned similar weights with a small tolerance. assert_allclose(self.rnn_1.W_rec, self.rnn_2.W_rec, atol=1e-4) assert_allclose(self.rnn_2.W_rec, self.rnn_3.W_rec, atol=1e-4) #But that there was some difference from initialization self.assertFalse( np.isclose(self.rnn_1.W_rec, self.W_rec, atol=1e-4).all())
from options import options options = options() opts = options.parse() #data loader data_loader = data.dataloader(opts) train_loader = util.create_dataset( [data_loader.train_data, data_loader.train_label], data_loader.wordIdx, data_loader.labelIdx, opts) from network import RNN from train import train from test import test '''RNN model''' RNN = RNN(opts, data_loader.wordIdx, data_loader.labelIdx, len(data_loader.labelIdx.items())).to(device) if opts.print_model: print(RNN) '''Optimizers''' import torch.optim as optim RNN_optim = optim.Adam(RNN.parameters(), lr=opts.lr, betas=(opts.beta1, opts.beta2)) '''run training''' trainer = train(opts, RNN, RNN_optim, train_loader) trainer.trainer() test_loader = util.create_dataset( [data_loader.test_data, data_loader.test_label], data_loader.wordIdx,
def test_kernl_reduce_rflo(self): """Verifies that KeRNL reduces to RFLO in special case. If beta is initialized to the identity while the gammas are all initialized to the network inverse time constant alpha, and the KeRNL optimizer has 0 learning rate (i.e. beta and gamma do not change), then KeRNL should produce the same gradients as RFLO if the approximate KeRNL of (1 - alpha) (rather than exp(-alpha)) is used.""" self.task = Add_Task(4, 6, deterministic=True, tau_task=2) self.data = self.task.gen_data(100, 0) alpha = 0.3 self.rnn_1 = RNN(self.W_in, self.W_rec, self.W_out, self.b_rec, self.b_out, activation=tanh, alpha=alpha, output=softmax, loss=softmax_cross_entropy) self.rnn_2 = RNN(self.W_in, self.W_rec, self.W_out, self.b_rec, self.b_out, activation=tanh, alpha=alpha, output=softmax, loss=softmax_cross_entropy) #RFLO np.random.seed(1) self.optimizer_1 = Stochastic_Gradient_Descent(lr=0.001) self.learn_alg_1 = RFLO(self.rnn_1, alpha) #KeRNL with beta and gamma fixed to RFLO values np.random.seed(1) self.optimizer_2 = Stochastic_Gradient_Descent(lr=0.001) self.KeRNL_optimizer = Stochastic_Gradient_Descent(lr=0) A = np.eye(self.rnn_2.n_h) alpha_i = np.ones(self.rnn_2.n_h) * alpha self.learn_alg_2 = KeRNL(self.rnn_2, self.KeRNL_optimizer, A=A, alpha=alpha_i) monitors = [] np.random.seed(2) self.sim_1 = Simulation(self.rnn_1) self.sim_1.run(self.data, learn_alg=self.learn_alg_1, optimizer=self.optimizer_1, monitors=monitors, verbose=False) np.random.seed(2) self.sim_2 = Simulation(self.rnn_2) self.sim_2.run(self.data, learn_alg=self.learn_alg_2, optimizer=self.optimizer_2, monitors=monitors, verbose=False) #Assert networks learned the same weights assert_allclose(self.rnn_1.W_rec, self.rnn_2.W_rec) #Assert networks' parameters changed appreciably, despite a large #tolerance for closeness. self.assertFalse(np.isclose(self.W_rec, self.rnn_2.W_rec).all())
transform=transform) trainloader = torch.utils.data.DataLoader(trainset, batch_size=opts.batch_size, shuffle=True) testset = datasets.MNIST('MNIST_data/', download=True, train=False, transform=transform) testloader = torch.utils.data.DataLoader(testset, batch_size=1, shuffle=False) from network import RNN from train import trainer from test import tester '''RNN model''' RNN = RNN(opts).to(device) if opts.print_model: print(RNN) '''Optimizers''' import torch.optim as optim RNN_optim = optim.Adam(RNN.parameters(), lr=opts.lr, betas=(opts.beta1, opts.beta2)) '''Criterion''' criterion = nn.CrossEntropyLoss() # the target label is not one-hotted '''run training''' trainer(opts, RNN, RNN_optim, criterion, trainloader) '''test''' tester(opts, RNN, testloader)
def generate(model_path): with open('{}/rnn.pickle'.format(model_path)) as f: config = pickle.load(f) n = RNN(training=False, **config) print n.gen_text(sess=None, model_path=model_path)
options = options() opts = options.parse() #data loader data_loader = data.dataloader(opts) train_loader = util.create_dataset(data_loader.train_data, data_loader.letteridx, data_loader.labelidx, opts) test_loader = util.create_dataset(data_loader.test_data, data_loader.letteridx, data_loader.labelidx, opts) from network import RNN from train import trainer from test import tester '''RNN model''' RNN = RNN(opts, data_loader.letteridx).to(device) if opts.print_model: print(RNN) '''Optimizers''' import torch.optim as optim RNN_optim = optim.Adam(RNN.parameters(), lr=opts.lr, betas=(opts.beta1, opts.beta2)) '''Criterion''' criterion = nn.NLLLoss() '''run training''' trainer(opts, RNN, RNN_optim, criterion, train_loader) '''test''' tester(opts, RNN, test_loader)
def train(word2vec, dataset, parameters, class_weights): modeldir = os.path.join(parameters["runs_dir"], parameters["model_name"]) if not os.path.exists(modeldir): os.mkdir(modeldir) logdir = os.path.join(modeldir, "log") if not os.path.exists(logdir): os.mkdir(logdir) logdir_train = os.path.join(logdir, "train") if not os.path.exists(logdir_train): os.mkdir(logdir_train) logdir_test = os.path.join(logdir, "test") if not os.path.exists(logdir_test): os.mkdir(logdir_test) # logdir_dev = os.path.join(logdir, "dev") # if not os.path.exists(logdir_dev): # os.mkdir(logdir_dev) savepath = os.path.join(modeldir, "save") #device_string = "/gpu:{}".format(parameters["gpu"]) if parameters["gpu"] else "/cpu:0" device_string = "/cpu:0" with tf.device(device_string): gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.8) config_proto = tf.ConfigProto(allow_soft_placement=True, gpu_options=gpu_options) sess = tf.Session(config=config_proto) headline_ph = tf.placeholder(tf.float32, shape=[ parameters["sequence_length"], None, parameters["embedding_dim"] ], name="headline") body_ph = tf.placeholder(tf.float32, shape=[ parameters["sequence_length"], None, parameters["embedding_dim"] ], name="body") targets_ph = tf.placeholder(tf.int32, shape=[None], name="targets") keep_prob_ph = tf.placeholder(tf.float32, name="keep_prob") _projecter = TensorFlowTrainable() projecter = _projecter.get_4Dweights( filter_height=1, filter_width=parameters["embedding_dim"], in_channels=1, out_channels=parameters["num_units"], name="projecter") optimizer = tf.train.AdamOptimizer( learning_rate=parameters["learning_rate"], name="ADAM", beta1=0.9, beta2=0.999) with tf.variable_scope(name_or_scope="headline"): headline = RNN(cell=LSTMCell, num_units=parameters["num_units"], embedding_dim=parameters["embedding_dim"], projecter=projecter, keep_prob=keep_prob_ph, class_weights=class_weights) headline.process(sequence=headline_ph) with tf.variable_scope(name_or_scope="body"): body = RNN(cell=AttentionLSTMCell, num_units=parameters["num_units"], embedding_dim=parameters["embedding_dim"], hiddens=headline.hiddens, states=headline.states, projecter=projecter, keep_prob=keep_prob_ph, class_weights=class_weights) body.process(sequence=body_ph) loss, loss_summary, accuracy, accuracy_summary = body.loss( targets=targets_ph) weight_decay = tf.reduce_sum([ tf.reduce_sum(parameter) for parameter in headline.parameters + body.parameters ]) global_loss = loss + parameters["weight_decay"] * weight_decay train_summary_op = tf.summary.merge([loss_summary, accuracy_summary]) train_summary_writer = tf.summary.FileWriter(logdir_train, sess.graph) test_summary_op = tf.summary.merge([loss_summary, accuracy_summary]) test_summary_writer = tf.summary.FileWriter(logdir_test) saver = tf.train.Saver(max_to_keep=10) summary_writer = tf.summary.FileWriter(logdir) tf.train.write_graph(sess.graph_def, modeldir, "graph.pb", as_text=False) loader = tf.train.Saver(tf.global_variables()) optimizer = tf.train.AdamOptimizer( learning_rate=parameters["learning_rate"], name="ADAM", beta1=0.9, beta2=0.999) train_op = optimizer.minimize(global_loss) sess.run(tf.global_variables_initializer()) batcher = Batcher(word2vec=word2vec) train_batches = batcher.batch_generator( dataset=dataset["train"], num_epochs=parameters["num_epochs"], batch_size=parameters["batch_size"]["train"], sequence_length=parameters["sequence_length"]) num_step_by_epoch = int( math.ceil( len(dataset["train"]["targets"]) / parameters["batch_size"]["train"])) for train_step, (train_batch, epoch) in enumerate(train_batches): feed_dict = { headline_ph: np.transpose(train_batch["headline"], (1, 0, 2)), body_ph: np.transpose(train_batch["body"], (1, 0, 2)), targets_ph: train_batch["targets"], keep_prob_ph: parameters["keep_prob"], } _, summary_str, train_loss, train_accuracy = sess.run( [train_op, train_summary_op, loss, accuracy], feed_dict=feed_dict) train_summary_writer.add_summary(summary_str, train_step) if train_step % 10 == 0: sys.stdout.write( "\rTRAIN | epoch={0}/{1}, step={2}/{3} | loss={4:.2f}, accuracy={5:.2f}% " .format(epoch + 1, parameters["num_epochs"], train_step % num_step_by_epoch, num_step_by_epoch, train_loss, 100. * train_accuracy)) sys.stdout.flush() if train_step % 500 == 0: test_batches = batcher.batch_generator( dataset=dataset["test"], num_epochs=1, batch_size=parameters["batch_size"]["test"], sequence_length=parameters["sequence_length"]) for test_step, (test_batch, _) in enumerate(test_batches): feed_dict = { headline_ph: np.transpose(test_batch["headline"], (1, 0, 2)), body_ph: np.transpose(test_batch["body"], (1, 0, 2)), targets_ph: test_batch["targets"], keep_prob_ph: 1., } summary_str, test_loss, test_accuracy = sess.run( [test_summary_op, loss, accuracy], feed_dict=feed_dict) print "\nTEST | loss={0:.2f}, accuracy={1:.2f}% ".format( test_loss, 100. * test_accuracy) print "" test_summary_writer.add_summary(summary_str, train_step) break if train_step % 5000 == 0: saver.save(sess, save_path=savepath, global_step=train_step) print ""
keys = yaml.load(open("twitter_keys", "r")) consumer_key = keys["consumer_key"] consumer_secret = keys["consumer_secret"] access_token = keys["access_token"] access_secret = keys["access_secret"] handler = OAuthHandler(consumer_key, consumer_secret) handler.set_access_token(access_token, access_secret) a, b, c, d, inv_vocab = load_dataset() mx = len(inv_vocab) api = API(handler) rnn = RNN(mx, rnn_size, False) model = L.Classifier(rnn) serializers.load_hdf5("mymodel.h5", model) while True: nxt = np.random.randint(0, mx) result = "" for i in range(40): nxt = np.array([nxt], np.int32) prob = F.softmax(model.predictor(nxt)) nxt = np.argmax(prob.data) s = inv_vocab[nxt] if s == "。": break result += s