示例#1
0
文件: trainer.py 项目: fankib/STGRU
 def prepare(self, loc_count, user_count, hidden_size, gru_factory, device):
     self.hidden_size = hidden_size
     if self.use_user_embedding:
         self.model = RNN_user(loc_count, user_count, hidden_size,
                               gru_factory).to(device)
     else:
         self.model = RNN(loc_count, hidden_size, gru_factory).to(device)
def load_checkpoint(filename, dir=GOOGLE_COLAB_CHECKPOINT_DIR):
    checkpoint = torch.load(os.path.join(dir, filename))
    model = RNN(hidden_size=checkpoint['hidden_size'],
                output_size=checkpoint['output_size'],
                n_layers=checkpoint['n_layers'],
                batch_size=checkpoint['batch_size'],
                bidirectional=checkpoint['bidirectional'])
    model.load_state_dict(checkpoint['state_dict'])
    model.num_epochs_trained = checkpoint['num_epochs_trained']

    return model
示例#3
0
def main():
    with open('data/wsj00-18.pos') as f:
        training_data = f.readlines()

    with open('data/wsj19-21.pos') as f:
        test_data = f.readlines()

    model = word2vec.KeyedVectors.load_word2vec_format(
        './data/GoogleNews-vectors-negative300.bin', binary=True)

    print(model.wv.most_similar(positive=['woman', 'king'], negative=['man']))
    # print(model['hgoehgoehgoehg'])
    # print(len(model['hogehgoehgoe']))

    labels = ('NNP', ',', 'CD', 'NNS', 'JJ', 'MD', 'VB', 'DT', 'NN', 'IN', '.',
              'VBZ', 'VBG', 'CC', 'VBD', 'VBN', 'RB', 'TO', 'PRP', 'RBR',
              'WDT', 'VBP', 'RP', 'PRP$', 'JJS', 'POS', '``', 'EX', "''", 'WP',
              ':', 'JJR', 'WRB', '$', 'NNPS', 'WP$', '-LRB-', '-RRB-', 'PDT',
              'RBS', 'FW', 'UH', 'SYM', 'LS', '#')

    rnn = RNN(300, 1000, labels)

    training_vector_data = [line for line in training_data]

    test_vector_data = [line for line in test_data]

    manager = NetworkEvaluator(rnn, training_vector_data, test_vector_data)
def run(checkpoint=None, dir=CHECKPOINT_DIR):
    gc.collect()
    batch_size = 1
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    # torch.autograd.set_detect_anomaly(True)
    clean_train_loader = DataLoader(clean_train_data,
                                    batch_size=batch_size,
                                    shuffle=True,
                                    num_workers=2,
                                    pin_memory=True)
    clean_test_loader = DataLoader(clean_test_data,
                                   batch_size=batch_size,
                                   shuffle=False,
                                   num_workers=2,
                                   pin_memory=True)

    if checkpoint:
        model = load_checkpoint(checkpoint, dir=dir).to(device)
    else:
        model = RNN(hidden_size=80,
                    output_size=5,
                    n_layers=2,
                    batch_size=batch_size,
                    bidirectional=False).to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
    loss = torch.nn.CrossEntropyLoss()

    losses = train(clean_train_loader,
                   clean_test_loader,
                   500,
                   model,
                   optimizer,
                   loss,
                   device,
                   checkpoint_dir=CHECKPOINT_DIR)
    return model, losses
示例#5
0
文件: trainer.py 项目: fankib/STGRU
class BprTrainer(Trainer):
    def greeter(self):
        if not self.use_user_embedding:
            return 'Use BPR training.'
        return 'Use BPR training with user embeddings.'

    def prepare(self, loc_count, user_count, hidden_size, gru_factory, device):
        self.hidden_size = hidden_size
        if self.use_user_embedding:
            self.model = RNN_user(loc_count, user_count, hidden_size,
                                  gru_factory).to(device)
        else:
            self.model = RNN(loc_count, hidden_size, gru_factory).to(device)

    def evaluate(self, x, t, s, y_t, y_s, h, active_users):
        seq_length = x.shape[0]
        user_length = x.shape[1]
        out, h = self.model(x, h, active_users)
        out_t = out.transpose(0, 1)
        response = []
        Q = self.model.encoder.weight
        for j in range(user_length):
            out_j = out_t[j].transpose(0, 1)
            o = torch.matmul(Q, out_j).cpu().detach()
            o = o.transpose(0, 1)
            o = o.contiguous().view(seq_length, -1)
            response.append(o)
        return response, h

    def loss(self, x, t, s, y, y_t, y_s, h, active_users):
        out, h = self.model(x, h, active_users)
        y_emb = self.model.encoder(y)

        # reshape
        out = out.view(-1, self.hidden_size)
        out_t = out.transpose(0, 1)
        y_emb = y_emb.contiguous().view(-1, self.hidden_size)
        Q = self.model.encoder.weight

        neg_o = torch.matmul(Q, out_t)
        pos_o = torch.matmul(y_emb, out_t).diag()

        l = torch.log(1 + torch.exp(-(pos_o - neg_o)))
        l = torch.mean(l)
        return l, h
class RESTService:
    def __init__(self, host='0.0.0.0', port=8080):
        self._host = host
        self._port = port
        self._model = RNN()
        self._app = Bottle()
        self._route()

    def _route(self):
        self._app.route('/predict', method="POST", callback=self._predict)

    def _predict(self):
        data = request.json
        category = self._model.predict(data)
        response.content_type = 'text/plain'
        return str(category)

    def start(self):
        self._app.run(host=self._host, port=self._port)
    def setUpClass(cls):

        cls.W_in = np.eye(2)
        cls.W_rec = np.eye(2)
        cls.W_out = np.eye(2)
        cls.W_FB = -np.ones((2, 2)) + np.eye(2)
        cls.b_rec = np.zeros(2)
        cls.b_out = np.zeros(2)

        cls.rnn = RNN(cls.W_in,
                      cls.W_rec,
                      cls.W_out,
                      cls.b_rec,
                      cls.b_out,
                      activation=identity,
                      alpha=1,
                      output=softmax,
                      loss=softmax_cross_entropy)

        cls.rnn.a = np.ones(2)
        cls.rnn.error = np.ones(2) * 0.5
    def test_mimic_task(self):
        """Verifies that the proper RNN output is returned as label in a simple
        case where the RNN simply counts the number of time steps."""

        from network import RNN
        from functions import identity, mean_squared_error

        n_in = 2
        n_h = 2
        n_out = 2

        W_in_target = np.eye(n_in)
        W_rec_target = np.eye(n_h)
        W_out_target = np.eye(n_out)
        b_rec_target = np.zeros(n_h)
        b_out_target = np.zeros(n_out)

        alpha = 1

        rnn_target = RNN(W_in_target,
                         W_rec_target,
                         W_out_target,
                         b_rec_target,
                         b_out_target,
                         activation=identity,
                         alpha=alpha,
                         output=identity,
                         loss=mean_squared_error)

        task = Mimic_RNN(rnn_target, p_input=1, tau_task=1)
        data = task.gen_data(100, 0)

        y = np.arange(1, 101)
        y_correct = np.array([y, y]).T

        self.assertTrue(np.isclose(data['train']['Y'], y_correct).all())
示例#9
0
def train_rnn(file, batch_size, layers, learning_rate, dropout, num_steps,
              cell_size, epochs, cell, test_seed, delim, save):
    """ Train neural network """
    model_name = "cell-{}-size-{}-batch-{}-steps-{}-layers-{}-lr-{}-dropout-{}".format(
        cell, cell_size, batch_size, num_steps, layers, learning_rate, dropout)
    ds = Dataset(file,
                 batch_size=batch_size,
                 num_steps=num_steps,
                 with_delim=delim)
    n = RNN(data=ds,
            cell=cell,
            num_layers=layers,
            dropout=dropout,
            learning_rate=learning_rate,
            cell_size=cell_size,
            num_epochs=epochs)
    n.train(save=save,
            model_name=model_name,
            test_output=True,
            test_seed=test_seed,
            with_delim=delim)
    if save:
        n.save(model_name)
 def __init__(self, host='0.0.0.0', port=8080):
     self._host = host
     self._port = port
     self._model = RNN()
     self._app = Bottle()
     self._route()
from torch.utils.data import DataLoader
import numpy as np
import matplotlib.pyplot as plt
from torch.autograd import Variable
from dataset import dataset, dataset2
from network import RNN
import os
model_dir = '/home/lixiaoyu/project/airQuality/Analysis-of-Air-Quality-and-Outpatient-Quantity/ckpt/'

TIME_STEP = 120
INPUT_SIZE = 7
HIDDEN_SIZE = 32
LR = 0.01
EPOCH = 1000

rnn = RNN(INPUT_SIZE=INPUT_SIZE, HIDDEN_SIZE=HIDDEN_SIZE)
optimizer = torch.optim.Adam(rnn.parameters(), lr=LR)
loss_func = nn.MSELoss()

LoadModel = False


def train(train_loader, num_e):
    torch.manual_seed(1)
    if LoadModel:
        checkpoint = torch.load(model_dir + '{}.ckpt'.format(num_e))
        rnn.load_state_dict(checkpoint['state_dict'])
        print('Loading model~~~~~~~~~~', num_e)

    for e in range(EPOCH):
        print('epoch>>>>>>> ', e)
示例#12
0
def test(word2vec, dataset, parameters, loadpath):
    print "1"
    device_string = "/gpu:{}".format(
        parameters["gpu"]) if parameters["gpu"] else "/cpu:0"
    with tf.device(device_string):
        print "2"
        gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.8)
        config_proto = tf.ConfigProto(allow_soft_placement=True,
                                      gpu_options=gpu_options)

        sess = tf.Session(config=config_proto)

        premises_ph = tf.placeholder(tf.float32,
                                     shape=[
                                         parameters["sequence_length"], None,
                                         parameters["embedding_dim"]
                                     ],
                                     name="premises")
        hypothesis_ph = tf.placeholder(tf.float32,
                                       shape=[
                                           parameters["sequence_length"], None,
                                           parameters["embedding_dim"]
                                       ],
                                       name="hypothesis")
        targets_ph = tf.placeholder(tf.int32, shape=[None], name="targets")
        keep_prob_ph = tf.placeholder(tf.float32, name="keep_prob")

        _projecter = TensorFlowTrainable()
        projecter = _projecter.get_4Dweights(
            filter_height=1,
            filter_width=parameters["embedding_dim"],
            in_channels=1,
            out_channels=parameters["num_units"],
            name="projecter")

        with tf.variable_scope(name_or_scope="premise"):
            premise = RNN(cell=LSTMCell,
                          num_units=parameters["num_units"],
                          embedding_dim=parameters["embedding_dim"],
                          projecter=projecter,
                          keep_prob=keep_prob_ph)
            premise.process(sequence=premises_ph)

        with tf.variable_scope(name_or_scope="hypothesis"):
            hypothesis = RNN(cell=AttentionLSTMCell,
                             num_units=parameters["num_units"],
                             embedding_dim=parameters["embedding_dim"],
                             hiddens=premise.hiddens,
                             states=premise.states,
                             projecter=projecter,
                             keep_prob=keep_prob_ph)
            hypothesis.process(sequence=hypothesis_ph)

        loss, loss_summary, accuracy, accuracy_summary = hypothesis.loss(
            targets=targets_ph)

        loader = tf.train.Saver()
        loader.restore(sess, loadpath)

        batcher = Batcher(word2vec=word2vec, settings=parameters)
        test_batches = batcher.batch_generator(
            dataset=dataset["test"],
            num_epochs=1,
            batch_size=parameters["batch_size"]["test"],
            sequence_length=parameters["sequence_length"])
        print "2.5"
        for test_step, (test_batch, _) in enumerate(test_batches):
            print "3"
            feed_dict = {
                premises_ph: np.transpose(test_batch["premises"], (1, 0, 2)),
                hypothesis_ph: np.transpose(test_batch["hypothesis"],
                                            (1, 0, 2)),
                targets_ph: test_batch["targets"],
                keep_prob_ph: 1.,
            }

            test_loss, test_accuracy = sess.run([loss, accuracy],
                                                feed_dict=feed_dict)
            print "\nTEST | loss={0:.2f}, accuracy={1:.2f}%   ".format(
                test_loss, 100. * test_accuracy)
            print ""
示例#13
0
training_loader = DataLoader(training_set, args.batch_size, shuffle=True)

validation_data, validation_labels = joblib.load('%s/validation-%d.data' % (args.datapath, args.n))
validation_data = th.from_numpy(validation_data)
validation_labels = onehot_sequence(th.from_numpy(validation_labels), 10, cuda)
validation_set = TensorDataset(validation_data, validation_labels)
validation_loader = DataLoader(validation_set, args.batch_size)

test_data, test_labels = joblib.load('%s/test-%d.data' % (args.datapath, args.n))
test_data = th.from_numpy(test_data)
test_labels = onehot_sequence(th.from_numpy(test_labels), 10, cuda)
test_set = TensorDataset(test_data, test_labels)
test_loader = DataLoader(test_set, args.batch_size)

cnn_path = args.pretrained_cnn_path if args.pretrained_cnn else None
model = RNN(args.n_units, 10, cnn_path, cuda)
if args.gpu > -1:
  model.cuda()
if args.criterion == 'regression_loss':
  from criterions import regression_loss
  criterion = regression_loss(args.entropy_scale)
else:
  criterion = getattr(__import__('criterions'), args.criterion)()
if args.gpu > -1:
  criterion.cuda()
optimizer = Adam(model.parameters(), lr=1e-3)
vis = visdom.Visdom()
tb_path = args.tensorboard_path
if args.tensorboard_log:
  tb_path += '/%s' % args.tensorboard_log
TensorboardVisualizer.configure(tb_path)
def train(word2vec, dataset, parameters):
    modeldir = os.path.join(parameters["runs_dir"], parameters["model_name"])
    if not os.path.exists(modeldir):
        os.mkdir(modeldir)
    logdir = os.path.join(modeldir, "log")
    if not os.path.exists(logdir):
        os.mkdir(logdir)
    logdir_train = os.path.join(logdir, "train")
    if not os.path.exists(logdir_train):
        os.mkdir(logdir_train)
    logdir_test = os.path.join(logdir, "test")
    if not os.path.exists(logdir_test):
        os.mkdir(logdir_test)
    logdir_dev = os.path.join(logdir, "dev")
    if not os.path.exists(logdir_dev):
        os.mkdir(logdir_dev)
    savepath = os.path.join(modeldir, "save")

    device_string = "/gpu:{}".format(parameters["gpu"]) if parameters["gpu"] else "/cpu:0"
    with tf.device(device_string):
        gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.8)
        config_proto = tf.ConfigProto(allow_soft_placement=True, gpu_options=gpu_options)

        sess = tf.Session(config=config_proto)

        premises_ph = tf.placeholder(tf.float32, shape=[parameters["sequence_length"], None, parameters["embedding_dim"]], name="premises")
        hypothesis_ph = tf.placeholder(tf.float32, shape=[parameters["sequence_length"], None, parameters["embedding_dim"]], name="hypothesis")
        targets_ph = tf.placeholder(tf.int32, shape=[None], name="targets")
        keep_prob_ph = tf.placeholder(tf.float32, name="keep_prob")

        _projecter = TensorFlowTrainable()
        projecter = _projecter.get_4Dweights(filter_height=1, filter_width=parameters["embedding_dim"], in_channels=1, out_channels=parameters["num_units"], name="projecter")

        optimizer = tf.train.AdamOptimizer(learning_rate=parameters["learning_rate"], name="ADAM", beta1=0.9, beta2=0.999)
        
        with tf.variable_scope(name_or_scope="premise"):
            premise = RNN(cell=LSTMCell, num_units=parameters["num_units"], embedding_dim=parameters["embedding_dim"], projecter=projecter, keep_prob=keep_prob_ph)
            premise.process(sequence=premises_ph)

        with tf.variable_scope(name_or_scope="hypothesis"):
            hypothesis = RNN(cell=AttentionLSTMCell, num_units=parameters["num_units"], embedding_dim=parameters["embedding_dim"], hiddens=premise.hiddens, states=premise.states, projecter=projecter, keep_prob=keep_prob_ph)
            hypothesis.process(sequence=hypothesis_ph)

        loss, loss_summary, accuracy, accuracy_summary = hypothesis.loss(targets=targets_ph)

        weight_decay = tf.reduce_sum([tf.reduce_sum(parameter) for parameter in premise.parameters + hypothesis.parameters])

        global_loss = loss + parameters["weight_decay"] * weight_decay

        train_summary_op = tf.merge_summary([loss_summary, accuracy_summary])
        train_summary_writer = tf.train.SummaryWriter(logdir_train, sess.graph)
        test_summary_op = tf.merge_summary([loss_summary, accuracy_summary])
        test_summary_writer = tf.train.SummaryWriter(logdir_test)
        
        saver = tf.train.Saver(max_to_keep=10)
        summary_writer = tf.train.SummaryWriter(logdir)
        tf.train.write_graph(sess.graph_def, modeldir, "graph.pb", as_text=False)
        loader = tf.train.Saver(tf.all_variables())

        optimizer = tf.train.AdamOptimizer(learning_rate=parameters["learning_rate"], name="ADAM", beta1=0.9, beta2=0.999)
        train_op = optimizer.minimize(global_loss)

        sess.run(tf.initialize_all_variables())
        
        batcher = Batcher(word2vec=word2vec)
        train_batches = batcher.batch_generator(dataset=dataset["train"], num_epochs=parameters["num_epochs"], batch_size=parameters["batch_size"]["train"], sequence_length=parameters["sequence_length"])
        num_step_by_epoch = int(math.ceil(len(dataset["train"]["targets"]) / parameters["batch_size"]["train"]))
        for train_step, (train_batch, epoch) in enumerate(train_batches):
            feed_dict = {
                            premises_ph: np.transpose(train_batch["premises"], (1, 0, 2)),
                            hypothesis_ph: np.transpose(train_batch["hypothesis"], (1, 0, 2)),
                            targets_ph: train_batch["targets"],
                            keep_prob_ph: parameters["keep_prob"],
                        }

            _, summary_str, train_loss, train_accuracy = sess.run([train_op, train_summary_op, loss, accuracy], feed_dict=feed_dict)
            train_summary_writer.add_summary(summary_str, train_step)
            if train_step % 100 == 0:
                sys.stdout.write("\rTRAIN | epoch={0}/{1}, step={2}/{3} | loss={4:.2f}, accuracy={5:.2f}%   ".format(epoch + 1, parameters["num_epochs"], train_step % num_step_by_epoch, num_step_by_epoch, train_loss, 100. * train_accuracy))
                sys.stdout.flush()
            if train_step % 5000 == 0:
                test_batches = batcher.batch_generator(dataset=dataset["test"], num_epochs=1, batch_size=parameters["batch_size"]["test"], sequence_length=parameters["sequence_length"])
                for test_step, (test_batch, _) in enumerate(test_batches):
                    feed_dict = {
                                    premises_ph: np.transpose(test_batch["premises"], (1, 0, 2)),
                                    hypothesis_ph: np.transpose(test_batch["hypothesis"], (1, 0, 2)),
                                    targets_ph: test_batch["targets"],
                                    keep_prob_ph: 1.,
                                }

                    summary_str, test_loss, test_accuracy = sess.run([test_summary_op, loss, accuracy], feed_dict=feed_dict)
                    print"\nTEST | loss={0:.2f}, accuracy={1:.2f}%   ".format(test_loss, 100. * test_accuracy)
                    print ""
                    test_summary_writer.add_summary(summary_str, train_step)
                    break
            if train_step % 5000 == 0:
                saver.save(sess, save_path=savepath, global_step=train_step)
        print ""
示例#15
0
def train(word2vec, dataset, parameters):
    modeldir = os.path.join(parameters["runs_dir"], parameters["model_name"])
    if not os.path.exists(modeldir):
        os.mkdir(modeldir)
    logdir = os.path.join(modeldir, "log")
    if not os.path.exists(logdir):
        os.mkdir(logdir)
    logdir_train = os.path.join(logdir, "train")
    if not os.path.exists(logdir_train):
        os.mkdir(logdir_train)
    logdir_test = os.path.join(logdir, "test")
    if not os.path.exists(logdir_test):
        os.mkdir(logdir_test)
    logdir_dev = os.path.join(logdir, "dev")
    if not os.path.exists(logdir_dev):
        os.mkdir(logdir_dev)
    savepath = os.path.join(modeldir, "save")

    device_string = "/gpu:{}".format(
        parameters["gpu"]) if parameters["gpu"] else "/cpu:0"
    with tf.device(device_string):
        gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.8)
        config_proto = tf.ConfigProto(allow_soft_placement=True,
                                      gpu_options=gpu_options)

        sess = tf.Session(config=config_proto)

        premises_ph = tf.placeholder(tf.float32,
                                     shape=[
                                         parameters["sequence_length"], None,
                                         parameters["embedding_dim"]
                                     ],
                                     name="premises")
        hypothesis_ph = tf.placeholder(tf.float32,
                                       shape=[
                                           parameters["sequence_length"], None,
                                           parameters["embedding_dim"]
                                       ],
                                       name="hypothesis")
        targets_ph = tf.placeholder(tf.int32, shape=[None], name="targets")
        keep_prob_ph = tf.placeholder(tf.float32, name="keep_prob")

        _projecter = TensorFlowTrainable()
        projecter = _projecter.get_4Dweights(
            filter_height=1,
            filter_width=parameters["embedding_dim"],
            in_channels=1,
            out_channels=parameters["num_units"],
            name="projecter")

        # optimizer = tf.train.AdamOptimizer(learning_rate=parameters["learning_rate"], name="ADAM", beta1=0.9, beta2=0.999)
        with tf.variable_scope(name_or_scope="premise"):
            premise = RNN(cell=LSTMCell,
                          num_units=parameters["num_units"],
                          embedding_dim=parameters["embedding_dim"],
                          projecter=projecter,
                          keep_prob=keep_prob_ph)
            premise.process(sequence=premises_ph)

        with tf.variable_scope(name_or_scope="hypothesis"):
            hypothesis = RNN(cell=AttentionLSTMCell,
                             num_units=parameters["num_units"],
                             embedding_dim=parameters["embedding_dim"],
                             hiddens=premise.hiddens,
                             states=premise.states,
                             projecter=projecter,
                             keep_prob=keep_prob_ph)
            hypothesis.process(sequence=hypothesis_ph)

        loss, loss_summary, accuracy, accuracy_summary = hypothesis.loss(
            targets=targets_ph)

        weight_decay = tf.reduce_sum([
            tf.reduce_sum(parameter)
            for parameter in premise.parameters + hypothesis.parameters
        ])

        global_loss = loss + parameters["weight_decay"] * weight_decay

        train_summary_op = tf.merge_summary([loss_summary, accuracy_summary])
        # train_summary_op = tf.summary.merge([loss_summary, accuracy_summary])
        train_summary_writer = tf.train.SummaryWriter(logdir_train, sess.graph)
        # train_summary_writer = tf.summary.FileWriter(logdir_train, sess.graph)
        # test_summary_op = tf.merge_summary([loss_summary, accuracy_summary])
        dev_summary_op = tf.merge_summary([loss_summary, accuracy_summary])
        # test_summary_writer = tf.train.SummaryWriter(logdir_test)
        dev_summary_writer = tf.train.SummaryWriter(logdir_dev)

        saver = tf.train.Saver(max_to_keep=10)
        # summary_writer = tf.train.SummaryWriter(logdir)
        tf.train.write_graph(sess.graph_def,
                             modeldir,
                             "graph.pb",
                             as_text=False)

        optimizer = tf.train.AdamOptimizer(
            learning_rate=parameters["learning_rate"],
            name="ADAM",
            beta1=0.9,
            beta2=0.999)
        train_op = optimizer.minimize(global_loss)

        sess.run(tf.initialize_all_variables())
        # sess.run(tf.global_variables_initializer())

        batcher = Batcher(word2vec=word2vec, settings=parameters)
        #train_split = "train"
        #train_batches = batcher.batch_generator(dataset=dataset[train_split], num_epochs=parameters["num_epochs"],
        # batch_size=parameters["batch_size"]["train"],
        # sequence_length=parameters["sequence_length"])
        #print("train data size: %d" % len(dataset["train"]["targets"]))
        #num_step_by_epoch = int(math.ceil(len(dataset[train_split]["targets"]) / parameters["batch_size"]["train"]))
        #best_dev_accuracy = 0
        print("train data size: %d" % len(dataset["train"]["targets"]))
        best_dev_accuracy = 0.0
        total_loss = 0.0
        timestamp = time.time()
        for epoch in range(parameters["num_epochs"]):
            print("epoch %d" % epoch)
            train_batches = batcher.batch_generator(
                dataset=dataset["train"],
                num_epochs=1,
                batch_size=parameters["batch_size"]["train"],
                sequence_length=parameters["sequence_length"])
            steps = len(dataset["train"]
                        ["targets"]) / parameters["batch_size"]["train"]

            # progress bar http://stackoverflow.com/a/3002114
            bar = progressbar.ProgressBar(maxval=steps / 10 + 1,
                                          widgets=[
                                              progressbar.Bar('=', '[', ']'),
                                              ' ',
                                              progressbar.Percentage()
                                          ])
            bar.start()
            for step, (train_batch, train_epoch) in enumerate(train_batches):
                feed_dict = {
                    premises_ph:
                    np.transpose(train_batch["premises"], (1, 0, 2)),
                    hypothesis_ph:
                    np.transpose(train_batch["hypothesis"], (1, 0, 2)),
                    targets_ph:
                    train_batch["targets"],
                    keep_prob_ph:
                    parameters["keep_prob"],
                }
                _, summary_str, train_loss, train_accuracy = sess.run(
                    [train_op, train_summary_op, loss, accuracy],
                    feed_dict=feed_dict)
                total_loss += train_loss
                train_summary_writer.add_summary(summary_str, step)
                if step % 100 == 0:  # eval 1 random dev batch
                    # eval 1 random dev batch
                    dev_batches = batcher.batch_generator(
                        dataset=dataset["dev"],
                        num_epochs=1,
                        batch_size=parameters["batch_size"]["dev"],
                        sequence_length=parameters["sequence_length"])
                    for dev_step, (dev_batch, _) in enumerate(dev_batches):
                        feed_dict = {
                            premises_ph:
                            np.transpose(dev_batch["premises"], (1, 0, 2)),
                            hypothesis_ph:
                            np.transpose(dev_batch["hypothesis"], (1, 0, 2)),
                            targets_ph:
                            dev_batch["targets"],
                            keep_prob_ph:
                            1.,
                        }

                        summary_str, dev_loss, dev_accuracy = sess.run(
                            [dev_summary_op, loss, accuracy],
                            feed_dict=feed_dict)
                        dev_summary_writer.add_summary(summary_str, step)
                        break
                    bar.update(step / 10 + 1)
            bar.finish()
            # eval on all dev
            dev_batches = batcher.batch_generator(
                dataset=dataset["dev"],
                num_epochs=1,
                batch_size=len(dataset["dev"]["targets"]),
                sequence_length=parameters["sequence_length"])
            dev_accuracy = 0
            for dev_step, (dev_batch, _) in enumerate(dev_batches):
                feed_dict = {
                    premises_ph: np.transpose(dev_batch["premises"],
                                              (1, 0, 2)),
                    hypothesis_ph: np.transpose(dev_batch["hypothesis"],
                                                (1, 0, 2)),
                    targets_ph: dev_batch["targets"],
                    keep_prob_ph: 1.,
                }
                summary_str, dev_loss, dev_accuracy = sess.run(
                    [dev_summary_op, loss, accuracy], feed_dict=feed_dict)
                print "\nDEV full | loss={0:.2f}, accuracy={1:.2f}%   ".format(
                    dev_loss, 100. * dev_accuracy)
                print ""
                if dev_accuracy > best_dev_accuracy:
                    saver.save(sess,
                               save_path=savepath + '_best',
                               global_step=(epoch + 1) * steps)
                break
            saver.save(sess,
                       save_path=savepath,
                       global_step=(epoch + 1) * steps)
            current_time = time.time()
            print("Iter %3d  Loss %-8.3f  Dev Acc %-6.2f  Time %-5.2f at %s" %
                  (epoch, total_loss, dev_accuracy,
                   (current_time - timestamp) / 60.0,
                   str(datetime.datetime.now())))
            total_loss = 0.0
        print ""
示例#16
0
""" make predictions using the network
"""

import torch
import torch.nn

import os

import dataset
from network import RNN

dataset = dataset.Dataset()

max_length = 20

rnn = RNN(dataset.n_letters, 128, dataset.n_letters, dataset.n_categories)
rnn.eval()

# load weights
if os.path.exists("models/gen_names.pkl"):
    checkpoint = torch.load("models/gen_names.pkl")
    rnn.load_state_dict(checkpoint['nn_state_dict'])
    print("checkpoint loaded")


def sample(category, start_char):
    with torch.no_grad():
        category_tensor_var = dataset.category_tensor(category)
        input = dataset.input_tensor(start_char)
        hidden = rnn.init_hidden()
        output_name = start_char
    def test_small_lr_case(self):

        alpha = 1

        self.rnn_1 = RNN(self.W_in,
                         self.W_rec,
                         self.W_out,
                         self.b_rec,
                         self.b_out,
                         activation=tanh,
                         alpha=alpha,
                         output=softmax,
                         loss=softmax_cross_entropy)

        self.rnn_2 = RNN(self.W_in,
                         self.W_rec,
                         self.W_out,
                         self.b_rec,
                         self.b_out,
                         activation=tanh,
                         alpha=alpha,
                         output=softmax,
                         loss=softmax_cross_entropy)

        self.rnn_3 = RNN(self.W_in,
                         self.W_rec,
                         self.W_out,
                         self.b_rec,
                         self.b_out,
                         activation=tanh,
                         alpha=alpha,
                         output=softmax,
                         loss=softmax_cross_entropy)

        lr = 0.00001
        self.optimizer_1 = Stochastic_Gradient_Descent(lr=lr)
        self.learn_alg_1 = RTRL(self.rnn_1)
        self.optimizer_2 = Stochastic_Gradient_Descent(lr=lr)
        self.learn_alg_2 = Future_BPTT(self.rnn_2, 25)
        self.optimizer_3 = Stochastic_Gradient_Descent(lr=lr)
        self.learn_alg_3 = Efficient_BPTT(self.rnn_3, 100)

        monitors = []

        np.random.seed(1)
        self.sim_1 = Simulation(self.rnn_1)
        self.sim_1.run(self.data,
                       learn_alg=self.learn_alg_1,
                       optimizer=self.optimizer_1,
                       monitors=monitors,
                       verbose=False)

        np.random.seed(1)
        self.sim_2 = Simulation(self.rnn_2)
        self.sim_2.run(self.data,
                       learn_alg=self.learn_alg_2,
                       optimizer=self.optimizer_2,
                       monitors=monitors,
                       verbose=False)

        np.random.seed(1)
        self.sim_3 = Simulation(self.rnn_3)
        self.sim_3.run(self.data,
                       learn_alg=self.learn_alg_3,
                       optimizer=self.optimizer_3,
                       monitors=monitors,
                       verbose=False)

        #Assert networks learned similar weights with a small tolerance.
        assert_allclose(self.rnn_1.W_rec, self.rnn_2.W_rec, atol=1e-4)
        assert_allclose(self.rnn_2.W_rec, self.rnn_3.W_rec, atol=1e-4)
        #But that there was some difference from initialization
        self.assertFalse(
            np.isclose(self.rnn_1.W_rec, self.W_rec, atol=1e-4).all())
示例#18
0
from options import options
options = options()
opts = options.parse()

#data loader
data_loader = data.dataloader(opts)

train_loader = util.create_dataset(
    [data_loader.train_data, data_loader.train_label], data_loader.wordIdx,
    data_loader.labelIdx, opts)

from network import RNN
from train import train
from test import test
'''RNN model'''
RNN = RNN(opts, data_loader.wordIdx, data_loader.labelIdx,
          len(data_loader.labelIdx.items())).to(device)

if opts.print_model:
    print(RNN)
'''Optimizers'''
import torch.optim as optim

RNN_optim = optim.Adam(RNN.parameters(),
                       lr=opts.lr,
                       betas=(opts.beta1, opts.beta2))
'''run training'''
trainer = train(opts, RNN, RNN_optim, train_loader)
trainer.trainer()

test_loader = util.create_dataset(
    [data_loader.test_data, data_loader.test_label], data_loader.wordIdx,
    def test_kernl_reduce_rflo(self):
        """Verifies that KeRNL reduces to RFLO in special case.

        If beta is initialized to the identity while the gammas are all
        initialized to the network inverse time constant alpha, and the KeRNL
        optimizer has 0 learning rate (i.e. beta and gamma do not change), then
        KeRNL should produce the same gradients as RFLO if the approximate
        KeRNL of (1 - alpha) (rather than exp(-alpha)) is used."""

        self.task = Add_Task(4, 6, deterministic=True, tau_task=2)
        self.data = self.task.gen_data(100, 0)

        alpha = 0.3

        self.rnn_1 = RNN(self.W_in,
                         self.W_rec,
                         self.W_out,
                         self.b_rec,
                         self.b_out,
                         activation=tanh,
                         alpha=alpha,
                         output=softmax,
                         loss=softmax_cross_entropy)

        self.rnn_2 = RNN(self.W_in,
                         self.W_rec,
                         self.W_out,
                         self.b_rec,
                         self.b_out,
                         activation=tanh,
                         alpha=alpha,
                         output=softmax,
                         loss=softmax_cross_entropy)

        #RFLO
        np.random.seed(1)
        self.optimizer_1 = Stochastic_Gradient_Descent(lr=0.001)
        self.learn_alg_1 = RFLO(self.rnn_1, alpha)
        #KeRNL with beta and gamma fixed to RFLO values
        np.random.seed(1)
        self.optimizer_2 = Stochastic_Gradient_Descent(lr=0.001)
        self.KeRNL_optimizer = Stochastic_Gradient_Descent(lr=0)
        A = np.eye(self.rnn_2.n_h)
        alpha_i = np.ones(self.rnn_2.n_h) * alpha
        self.learn_alg_2 = KeRNL(self.rnn_2,
                                 self.KeRNL_optimizer,
                                 A=A,
                                 alpha=alpha_i)

        monitors = []

        np.random.seed(2)
        self.sim_1 = Simulation(self.rnn_1)
        self.sim_1.run(self.data,
                       learn_alg=self.learn_alg_1,
                       optimizer=self.optimizer_1,
                       monitors=monitors,
                       verbose=False)

        np.random.seed(2)
        self.sim_2 = Simulation(self.rnn_2)
        self.sim_2.run(self.data,
                       learn_alg=self.learn_alg_2,
                       optimizer=self.optimizer_2,
                       monitors=monitors,
                       verbose=False)

        #Assert networks learned the same weights
        assert_allclose(self.rnn_1.W_rec, self.rnn_2.W_rec)
        #Assert networks' parameters changed appreciably, despite a large
        #tolerance for closeness.
        self.assertFalse(np.isclose(self.W_rec, self.rnn_2.W_rec).all())
示例#20
0
                          transform=transform)
trainloader = torch.utils.data.DataLoader(trainset,
                                          batch_size=opts.batch_size,
                                          shuffle=True)

testset = datasets.MNIST('MNIST_data/',
                         download=True,
                         train=False,
                         transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=1, shuffle=False)

from network import RNN
from train import trainer
from test import tester
'''RNN model'''
RNN = RNN(opts).to(device)

if opts.print_model:
    print(RNN)
'''Optimizers'''
import torch.optim as optim

RNN_optim = optim.Adam(RNN.parameters(),
                       lr=opts.lr,
                       betas=(opts.beta1, opts.beta2))
'''Criterion'''
criterion = nn.CrossEntropyLoss()  # the target label is not one-hotted
'''run training'''
trainer(opts, RNN, RNN_optim, criterion, trainloader)
'''test'''
tester(opts, RNN, testloader)
示例#21
0
def generate(model_path):
    with open('{}/rnn.pickle'.format(model_path)) as f:
        config = pickle.load(f)
    n = RNN(training=False, **config)
    print n.gen_text(sess=None, model_path=model_path)
示例#22
0
文件: main.py 项目: yk287/NLP
options = options()
opts = options.parse()

#data loader
data_loader = data.dataloader(opts)
train_loader = util.create_dataset(data_loader.train_data,
                                   data_loader.letteridx, data_loader.labelidx,
                                   opts)
test_loader = util.create_dataset(data_loader.test_data, data_loader.letteridx,
                                  data_loader.labelidx, opts)

from network import RNN
from train import trainer
from test import tester
'''RNN model'''
RNN = RNN(opts, data_loader.letteridx).to(device)

if opts.print_model:
    print(RNN)
'''Optimizers'''
import torch.optim as optim

RNN_optim = optim.Adam(RNN.parameters(),
                       lr=opts.lr,
                       betas=(opts.beta1, opts.beta2))
'''Criterion'''
criterion = nn.NLLLoss()
'''run training'''
trainer(opts, RNN, RNN_optim, criterion, train_loader)
'''test'''
tester(opts, RNN, test_loader)
示例#23
0
def train(word2vec, dataset, parameters, class_weights):
    modeldir = os.path.join(parameters["runs_dir"], parameters["model_name"])
    if not os.path.exists(modeldir):
        os.mkdir(modeldir)
    logdir = os.path.join(modeldir, "log")
    if not os.path.exists(logdir):
        os.mkdir(logdir)
    logdir_train = os.path.join(logdir, "train")
    if not os.path.exists(logdir_train):
        os.mkdir(logdir_train)
    logdir_test = os.path.join(logdir, "test")
    if not os.path.exists(logdir_test):
        os.mkdir(logdir_test)
    # logdir_dev = os.path.join(logdir, "dev")
    # if not os.path.exists(logdir_dev):
    #     os.mkdir(logdir_dev)
    savepath = os.path.join(modeldir, "save")

    #device_string = "/gpu:{}".format(parameters["gpu"]) if parameters["gpu"] else "/cpu:0"
    device_string = "/cpu:0"
    with tf.device(device_string):
        gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.8)
        config_proto = tf.ConfigProto(allow_soft_placement=True,
                                      gpu_options=gpu_options)

        sess = tf.Session(config=config_proto)

        headline_ph = tf.placeholder(tf.float32,
                                     shape=[
                                         parameters["sequence_length"], None,
                                         parameters["embedding_dim"]
                                     ],
                                     name="headline")
        body_ph = tf.placeholder(tf.float32,
                                 shape=[
                                     parameters["sequence_length"], None,
                                     parameters["embedding_dim"]
                                 ],
                                 name="body")
        targets_ph = tf.placeholder(tf.int32, shape=[None], name="targets")
        keep_prob_ph = tf.placeholder(tf.float32, name="keep_prob")

        _projecter = TensorFlowTrainable()
        projecter = _projecter.get_4Dweights(
            filter_height=1,
            filter_width=parameters["embedding_dim"],
            in_channels=1,
            out_channels=parameters["num_units"],
            name="projecter")

        optimizer = tf.train.AdamOptimizer(
            learning_rate=parameters["learning_rate"],
            name="ADAM",
            beta1=0.9,
            beta2=0.999)

        with tf.variable_scope(name_or_scope="headline"):
            headline = RNN(cell=LSTMCell,
                           num_units=parameters["num_units"],
                           embedding_dim=parameters["embedding_dim"],
                           projecter=projecter,
                           keep_prob=keep_prob_ph,
                           class_weights=class_weights)
            headline.process(sequence=headline_ph)

        with tf.variable_scope(name_or_scope="body"):
            body = RNN(cell=AttentionLSTMCell,
                       num_units=parameters["num_units"],
                       embedding_dim=parameters["embedding_dim"],
                       hiddens=headline.hiddens,
                       states=headline.states,
                       projecter=projecter,
                       keep_prob=keep_prob_ph,
                       class_weights=class_weights)
            body.process(sequence=body_ph)

        loss, loss_summary, accuracy, accuracy_summary = body.loss(
            targets=targets_ph)

        weight_decay = tf.reduce_sum([
            tf.reduce_sum(parameter)
            for parameter in headline.parameters + body.parameters
        ])

        global_loss = loss + parameters["weight_decay"] * weight_decay

        train_summary_op = tf.summary.merge([loss_summary, accuracy_summary])
        train_summary_writer = tf.summary.FileWriter(logdir_train, sess.graph)
        test_summary_op = tf.summary.merge([loss_summary, accuracy_summary])
        test_summary_writer = tf.summary.FileWriter(logdir_test)

        saver = tf.train.Saver(max_to_keep=10)
        summary_writer = tf.summary.FileWriter(logdir)
        tf.train.write_graph(sess.graph_def,
                             modeldir,
                             "graph.pb",
                             as_text=False)
        loader = tf.train.Saver(tf.global_variables())

        optimizer = tf.train.AdamOptimizer(
            learning_rate=parameters["learning_rate"],
            name="ADAM",
            beta1=0.9,
            beta2=0.999)
        train_op = optimizer.minimize(global_loss)

        sess.run(tf.global_variables_initializer())

        batcher = Batcher(word2vec=word2vec)
        train_batches = batcher.batch_generator(
            dataset=dataset["train"],
            num_epochs=parameters["num_epochs"],
            batch_size=parameters["batch_size"]["train"],
            sequence_length=parameters["sequence_length"])
        num_step_by_epoch = int(
            math.ceil(
                len(dataset["train"]["targets"]) /
                parameters["batch_size"]["train"]))
        for train_step, (train_batch, epoch) in enumerate(train_batches):
            feed_dict = {
                headline_ph: np.transpose(train_batch["headline"], (1, 0, 2)),
                body_ph: np.transpose(train_batch["body"], (1, 0, 2)),
                targets_ph: train_batch["targets"],
                keep_prob_ph: parameters["keep_prob"],
            }

            _, summary_str, train_loss, train_accuracy = sess.run(
                [train_op, train_summary_op, loss, accuracy],
                feed_dict=feed_dict)
            train_summary_writer.add_summary(summary_str, train_step)
            if train_step % 10 == 0:
                sys.stdout.write(
                    "\rTRAIN | epoch={0}/{1}, step={2}/{3} | loss={4:.2f}, accuracy={5:.2f}%   "
                    .format(epoch + 1, parameters["num_epochs"],
                            train_step % num_step_by_epoch, num_step_by_epoch,
                            train_loss, 100. * train_accuracy))
                sys.stdout.flush()
            if train_step % 500 == 0:
                test_batches = batcher.batch_generator(
                    dataset=dataset["test"],
                    num_epochs=1,
                    batch_size=parameters["batch_size"]["test"],
                    sequence_length=parameters["sequence_length"])
                for test_step, (test_batch, _) in enumerate(test_batches):
                    feed_dict = {
                        headline_ph:
                        np.transpose(test_batch["headline"], (1, 0, 2)),
                        body_ph:
                        np.transpose(test_batch["body"], (1, 0, 2)),
                        targets_ph:
                        test_batch["targets"],
                        keep_prob_ph:
                        1.,
                    }

                    summary_str, test_loss, test_accuracy = sess.run(
                        [test_summary_op, loss, accuracy], feed_dict=feed_dict)
                    print "\nTEST | loss={0:.2f}, accuracy={1:.2f}%   ".format(
                        test_loss, 100. * test_accuracy)
                    print ""
                    test_summary_writer.add_summary(summary_str, train_step)
                    break
            if train_step % 5000 == 0:
                saver.save(sess, save_path=savepath, global_step=train_step)
        print ""
示例#24
0
keys = yaml.load(open("twitter_keys", "r"))
consumer_key = keys["consumer_key"]
consumer_secret = keys["consumer_secret"]
access_token = keys["access_token"]
access_secret = keys["access_secret"]

handler = OAuthHandler(consumer_key, consumer_secret)
handler.set_access_token(access_token, access_secret)

a, b, c, d, inv_vocab = load_dataset()

mx = len(inv_vocab)

api = API(handler)
rnn = RNN(mx, rnn_size, False)
model = L.Classifier(rnn)
serializers.load_hdf5("mymodel.h5", model)

while True:
    nxt = np.random.randint(0, mx)
    result = ""
    for i in range(40):
        nxt = np.array([nxt], np.int32)
        prob = F.softmax(model.predictor(nxt))
        nxt = np.argmax(prob.data)
        s = inv_vocab[nxt]
        if s == "。":
            break
        result += s