Exemplo n.º 1
0
def run_step(step_num):
    if step_num == 0:
        model = models.RandomModel()
    elif step_num == 1:
        model = models.PriorModel()
    elif step_num == 2:
        model = models.SupModel()
    elif step_num == 3:
        print('Training will take 6-7 minutes')
        model = models.EmbedModel()
    elif step_num == 4:
        print('Training will take 30-40 minutes')
        gru = models.GRU()
        model = models.NeuralModel(gru)
    else:
        raise ValueError('Invalid step number')
    trainset = Dataset.get('train')
    num_train_candidates = Candidate.get_count()
    model.fit(trainset, num_train_candidates)
    print('Training finished!')

    for dsname in Dataset.ds2path.keys():
        ds = Dataset.get(dsname)
        pred_cids = model.predict(ds)
        print(dsname, ds.eval(pred_cids))
Exemplo n.º 2
0
def load_model(path_model, path_config, vocab):
    config = Config(path_config)
    model_name = config.getstr("model")
    word_dim = config.getint("word_dim")
    state_dim = config.getint("state_dim")

    if model_name == "rnn":
        model = models.RNN(vocab_size=len(vocab),
                           word_dim=word_dim,
                           state_dim=state_dim,
                           initialW=None,
                           EOS_ID=vocab["<EOS>"])
    elif model_name == "lstm":
        model = models.LSTM(vocab_size=len(vocab),
                            word_dim=word_dim,
                            state_dim=state_dim,
                            initialW=None,
                            EOS_ID=vocab["<EOS>"])
    elif model_name == "gru":
        model = models.GRU(vocab_size=len(vocab),
                           word_dim=word_dim,
                           state_dim=state_dim,
                           initialW=None,
                           EOS_ID=vocab["<EOS>"])
    else:
        print "[error] Unkwown model name: %s" % model_name
        sys.exit(-1)
    serializers.load_npz(path_model, model)
    return model
Exemplo n.º 3
0
def eval_pre_trained():
    model = models.GRU()
    model.load_state_dict(torch.load('../../assign2.model'))
    model.eval()
    net = models.NeuralModel(model)
    ds = Dataset.get('ace')
    pred_cids = net.predict(ds)
    print('ace', ds.eval(pred_cids))
Exemplo n.º 4
0
def get_model(args, dm):
    model = None
    if args.model_name == "GCN":
        model = models.GCN(adj=dm.adj, input_dim=args.seq_len, output_dim=args.hidden_dim)
    if args.model_name == "GRU":
        model = models.GRU(input_dim=dm.adj.shape[0], hidden_dim=args.hidden_dim)
    if args.model_name == "TGCN":
        model = models.TGCN(adj=dm.adj, hidden_dim=args.hidden_dim)
    return model
Exemplo n.º 5
0
def get_model(args, dm):
    model = None
    if args.model_name == 'GCN':
        model = models.GCN(adj=dm.adj,
                           input_dim=args.seq_len,
                           output_dim=args.hidden_dim)
    if args.model_name == 'GRU':
        model = models.GRU(input_dim=dm.adj.shape[0],
                           hidden_dim=args.hidden_dim)
    if args.model_name == 'TGCN':
        model = models.TGCN(adj=dm.adj,
                            hidden_dim=args.hidden_dim,
                            loss=args.loss)
    return model
Exemplo n.º 6
0
    if continue_with_previous:
        print "Loading previous model state" 

        net, state = models.load(model_file, MINIBATCH_SIZE, x)
        gsums, learning_rate, validation_ppl_history, starting_epoch, rng = state
        best_ppl = min(validation_ppl_history)

    else:
        rng = np.random
        rng.seed(1)

        print "Building model..."
        net = models.GRU(
            rng=rng,
            x=x,
            minibatch_size=MINIBATCH_SIZE,
            n_hidden=num_hidden,
            x_vocabulary=word_vocabulary,
            y_vocabulary=punctuation_vocabulary
            )

        starting_epoch = 0
        best_ppl = np.inf
        validation_ppl_history = []
        
        gsums = [theano.shared(np.zeros_like(param.get_value(borrow=True))) for param in net.params]

    cost = net.cost(y) + L2_REG * net.L2_sqr

    gparams = T.grad(cost, net.params)
    updates = OrderedDict()
Exemplo n.º 7
0
        fc_path = os.path.join(fc_path, 'eps.pth')
        torch.save(eps, fc_path)
    else:
        fc_path = os.path.join(fc_path, 'eps.pth')

    return torch.load(fc_path), fc_path


fc_dir = './fc_dir/'

## Initialize Generator, RNN, and latent codes

generator = models.Generator(ngpu, z_dim, ngf, ndf, nc)
generator = generator.cuda()

gru = models.GRU(dim_z_motion, 500, gpu=True)
gru.initWeight()
gru = gru.cuda()

z_c = utils.sample_z_content(dim_z_content)

## Start training

for ep in range(num_epoch):

    #Random shuffle data_folder
    np.random.shuffle(data_folders)

    train_tqdm = tqdm(range(100))
    psnr = 0.0
Exemplo n.º 8
0
def main(gpu, path_corpus, path_config, path_word2vec):
    MAX_EPOCH = 50
    EVAL = 200
    MAX_LENGTH = 70

    config = utils.Config(path_config)
    model_name = config.getstr("model")
    word_dim = config.getint("word_dim")
    state_dim = config.getint("state_dim")
    grad_clip = config.getfloat("grad_clip")
    weight_decay = config.getfloat("weight_decay")
    batch_size = config.getint("batch_size")

    print "[info] CORPUS: %s" % path_corpus
    print "[info] CONFIG: %s" % path_config
    print "[info] PRE-TRAINED WORD EMBEDDINGS: %s" % path_word2vec
    print "[info] MODEL: %s" % model_name
    print "[info] WORD DIM: %d" % word_dim
    print "[info] STATE DIM: %d" % state_dim
    print "[info] GRADIENT CLIPPING: %f" % grad_clip
    print "[info] WEIGHT DECAY: %f" % weight_decay
    print "[info] BATCH SIZE: %d" % batch_size

    path_save_head = os.path.join(
        config.getpath("snapshot"),
        "rnnlm.%s.%s" % (os.path.basename(path_corpus),
                         os.path.splitext(os.path.basename(path_config))[0]))
    print "[info] SNAPSHOT: %s" % path_save_head

    sents_train, sents_val, vocab, ivocab = \
            utils.load_corpus(path_corpus=path_corpus, max_length=MAX_LENGTH)

    if path_word2vec is not None:
        word2vec = utils.load_word2vec(path_word2vec, word_dim)
        initialW = utils.create_word_embeddings(vocab,
                                                word2vec,
                                                dim=word_dim,
                                                scale=0.001)
    else:
        initialW = None

    cuda.get_device(gpu).use()
    if model_name == "rnn":
        model = models.RNN(vocab_size=len(vocab),
                           word_dim=word_dim,
                           state_dim=state_dim,
                           initialW=initialW,
                           EOS_ID=vocab["<EOS>"])
    elif model_name == "lstm":
        model = models.LSTM(vocab_size=len(vocab),
                            word_dim=word_dim,
                            state_dim=state_dim,
                            initialW=initialW,
                            EOS_ID=vocab["<EOS>"])
    elif model_name == "gru":
        model = models.GRU(vocab_size=len(vocab),
                           word_dim=word_dim,
                           state_dim=state_dim,
                           initialW=initialW,
                           EOS_ID=vocab["<EOS>"])
    elif model_name == "bd_lstm":
        model = models.BD_LSTM(vocab_size=len(vocab),
                               word_dim=word_dim,
                               state_dim=state_dim,
                               initialW=initialW,
                               EOS_ID=vocab["<EOS>"])
    else:
        print "[error] Unknown model name: %s" % model_name
        sys.exit(-1)
    model.to_gpu(gpu)

    opt = optimizers.SMORMS3()
    opt.setup(model)
    opt.add_hook(chainer.optimizer.GradientClipping(grad_clip))
    opt.add_hook(chainer.optimizer.WeightDecay(weight_decay))

    print "[info] Evaluating on the validation sentences ..."
    loss_data, acc_data = evaluate(model, model_name, sents_val, ivocab)
    perp = math.exp(loss_data)
    print "[validation] iter=0, epoch=0, perplexity=%f, accuracy=%.2f%%" \
        % (perp, acc_data*100)

    it = 0
    n_train = len(sents_train)
    vocab_size = model.vocab_size
    for epoch in xrange(1, MAX_EPOCH + 1):
        perm = np.random.permutation(n_train)
        for data_i in xrange(0, n_train, batch_size):
            if data_i + batch_size > n_train:
                break
            words = sents_train[perm[data_i:data_i + batch_size]]

            if model_name == "bd_lstm":
                xs, ms = utils.make_batch(words,
                                          train=True,
                                          tail=False,
                                          mask=True)
                ys = model.forward(xs=xs, ms=ms, train=True)
            else:
                xs = utils.make_batch(words, train=True, tail=False)
                ys = model.forward(ts=xs, train=True)

            ys = F.concat(ys, axis=0)
            ts = F.concat(xs, axis=0)
            ys = F.reshape(ys, (-1, vocab_size))  # (TN, |V|)
            ts = F.reshape(ts, (-1, ))  # (TN,)

            loss = F.softmax_cross_entropy(ys, ts)
            acc = F.accuracy(ys, ts, ignore_label=-1)

            model.zerograds()
            loss.backward()
            loss.unchain_backward()
            opt.update()
            it += 1

            loss_data = float(cuda.to_cpu(loss.data))
            perp = math.exp(loss_data)
            acc_data = float(cuda.to_cpu(acc.data))
            print "[training] iter=%d, epoch=%d (%d/%d=%.03f%%), perplexity=%f, accuracy=%.2f%%" \
                    % (it, epoch, data_i+batch_size, n_train,
                        float(data_i+batch_size)/n_train*100,
                        perp, acc_data*100)

            if it % EVAL == 0:
                print "[info] Evaluating on the validation sentences ..."
                loss_data, acc_data = evaluate(model, model_name, sents_val,
                                               ivocab)
                perp = math.exp(loss_data)
                print "[validation] iter=%d, epoch=%d, perplexity=%f, accuracy=%.2f%%" \
                        % (it, epoch, perp, acc_data*100)

                serializers.save_npz(
                    path_save_head + ".iter_%d.epoch_%d.model" % (it, epoch),
                    model)
                utils.save_word2vec(
                    path_save_head + ".iter_%d.epoch_%d.vectors.txt" %
                    (it, epoch), utils.extract_word2vec(model, vocab))
                print "[info] Saved."

    print "[info] Done."
Exemplo n.º 9
0
        sys.exit("'Learning rate' argument missing!")

    model_file_name = "Model_%s_h%d_lr%s.pcl" % (model_name, num_hidden,
                                                 learning_rate)

    print(num_hidden, learning_rate, model_file_name)

    rng = np.random
    rng.seed(1)

    print("Building model ...")
    vocab_len = len(data.read_vocabulary(data.WORD_VOCAB_FILE))
    x_len = vocab_len if vocab_len < data.MAX_WORD_VOCABULARY_SIZE else data.MAX_WORD_VOCABULARY_SIZE + data.MIN_WORD_COUNT_IN_VOCAB
    x = np.ones((x_len, MINIBATCH_SIZE)).astype(int)
    # Initialize the weights of the model without any real data, comparable to placeholders in earlier Tensorflow version
    net = models.GRU(rng, x, num_hidden)
    optimizer = tf.keras.optimizers.Adagrad(learning_rate=learning_rate,
                                            initial_accumulator_value=1e-6)

    starting_epoch = 0
    best_ppl = np.inf
    validation_ppl_history = []

    print(
        f"Total number of trainable parameters: {sum(np.prod([dim for dim in param.get_shape()]) for param in net.params)}"
    )

    print("Training...")
    for epoch in range(starting_epoch, MAX_EPOCHS):
        t0 = time()
        total_neg_log_likelihood = 0
Exemplo n.º 10
0
train_loaders = list()
for d in train_datasets:
    temp = DataLoader(dataset=d, batch_size=args.batch_size, shuffle=True)
    train_loaders.append(temp)

# train_loader = DataLoader(dataset=train_dataset, batch_size=args.batch_size, shuffle=True)
validation_loader = DataLoader(dataset=validation_dataset,
                               batch_size=args.batch_size,
                               shuffle=False)

# Model, loss, and optimizer
if args.model == 'lstm':
    model = models.LSTM(args.input_size, args.hidden_size, args.num_layers,
                        args.num_classes, args.noise_std).to(device)
elif args.model == 'gru':
    model = models.GRU(args.input_size, args.hidden_size, args.num_layers,
                       args.num_classes, args.noise_std).to(device)
elif args.model == 'rnn':
    model = models.RNN(args.input_size, args.hidden_size, args.num_layers,
                       args.num_classes, args.noise_std).to(device)

criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate)

# Train the model
# total_step = len(train_loader)
# total_train_step = len(train_loader)
# total_val_step = len(validation_loader)


def evaluate(dataloader):
    total_loss = 0.0
Exemplo n.º 11
0
                           batch_size=10,
                           vocab_size=10000,
                           num_layers=2,
                           dp_keep_prob=0.35)

    elif (model_type == 'GRU'):

        #Generate samples using RNN
        dir = 'models/gru/best_params.pt'

        print("GRU model loaded.")

        model = models.GRU(emb_size=200,
                           hidden_size=1500,
                           seq_len=35,
                           batch_size=10,
                           vocab_size=10000,
                           num_layers=2,
                           dp_keep_prob=0.35)

    model.load_state_dict(torch.load(dir))

    #To remove the dropout
    model.eval()

    #Size of vocabulary
    vocab = 10000

    #Sample of size batch_size from the vocab using a uniform distribution
    #Take a random word as input to create the samples
    inp = np.random.choice(vocab, size=10, replace=True, p=None)
Exemplo n.º 12
0
            f_.write(log_str + '\n')

    # SAVE LEARNING CURVES
    lc_path = os.path.join(save_dir, 'learning_curves.npy')
    print('\nDONE\n\nSaving learning curves to ' + lc_path)
    np.save(
        lc_path, {
            'train_ppls': train_ppls,
            'val_ppls': val_ppls,
            'train_losses': train_losses,
            'val_losses': val_losses,
            'times': times
        })


if __name__ == '__main__':

    gru = models.GRU(batch_size=20,
                     seq_len=35,
                     hidden_size=1500,
                     num_layers=2,
                     vocab_size=10000,
                     dp_keep_prob=.35,
                     emb_size=200)

    valid_data, word_to_id, id_2_word = ptb_valid_data(data_path='data/')

    tokens = get_distribution(valid_data, 20, 35)

    generate_sequence(gru, word_to_id, id_2_word, tokens, sequence_length=35)