Exemplo n.º 1
0
def evaluate_train():
    # Load model
    weight_path = 'model/09031925_epoch_0_train_loss_5.9855.h5'

    # Load data
    Sources, Targets = load_train_data()
    de2idx, idx2de = load_de_vocab()
    en2idx, idx2en = load_en_vocab()
    batch_size = 5

    model = TransformerModel(in_vocab_len=len(idx2de),
                             out_vocab_len=len(idx2en),
                             max_len=hp.maxlen)
    model.load_model(weight_path)

    for i in range(5 // batch_size):
        x = Sources[i * batch_size:(i + 1) * batch_size]
        sources = Sources[i * batch_size:(i + 1) * batch_size]
        targets = Targets[i * batch_size:(i + 1) * batch_size]

        preds = model.translate_with_ans(sources, targets, idx2en)
        # preds = model.translate(x, idx2en)

        for source, target, pred in zip(sources, targets, preds):
            print('source:', ' '.join(idx2de[idx] for idx in source))
            print('expected:', ' '.join(idx2en[idx] for idx in target))
            print('pred:', pred)
            print()
Exemplo n.º 2
0
def train():
    current_batches = 0
    de2idx, idx2de = load_de_vocab()
    en2idx, idx2en = load_en_vocab()
    enc_voc = len(de2idx)
    dec_voc = len(en2idx)
    writer = SummaryWriter()
    # Load data
    X, Y = load_train_data()
    # calc total batch count
    num_batch = len(X) // hp.batch_size
    model = AttModel(hp, enc_voc, dec_voc)
    model.train()
    model.cuda()
    torch.backends.cudnn.benchmark = True
    if not os.path.exists(hp.model_dir):
        os.makedirs(hp.model_dir)
    if hp.preload is not None and os.path.exists(hp.model_dir + '/history.pkl'):
        with open(hp.model_dir + '/history.pkl', 'rb') as in_file:
            history = pickle.load(in_file)
    else:
        history = {'current_batches': 0}
    current_batches = history['current_batches']
    optimizer = optim.Adam(model.parameters(), lr=hp.lr, betas=[0.9, 0.98], eps=1e-8)
    if hp.preload is not None and os.path.exists(hp.model_dir + '/optimizer.pth'):
        optimizer.load_state_dict(torch.load(hp.model_dir + '/optimizer.pth'))
    if hp.preload is not None and os.path.exists(hp.model_dir + '/model_epoch_%02d.pth' % hp.preload):
        model.load_state_dict(torch.load(hp.model_dir + '/model_epoch_%02d.pth' % hp.preload))

    startepoch = int(hp.preload) if hp.preload is not None else 1
    for epoch in range(startepoch, hp.num_epochs + 1):
        current_batch = 0
        for index, current_index in get_batch_indices(len(X), hp.batch_size):
            tic = time.time()
            x_batch = Variable(torch.LongTensor(X[index]).cuda())
            y_batch = Variable(torch.LongTensor(Y[index]).cuda())
            toc = time.time()
            tic_r = time.time()
            torch.cuda.synchronize()
            optimizer.zero_grad()
            loss, _, acc = model(x_batch, y_batch)
            loss.backward()
            optimizer.step()
            torch.cuda.synchronize()
            toc_r = time.time()
            current_batches += 1
            current_batch += 1
            if current_batches % 10 == 0:
                writer.add_scalar('./loss', loss.data.cpu().numpy(), current_batches)
                writer.add_scalar('./acc', acc.data.cpu().numpy(), current_batches)
            if current_batches % 5 == 0:
                print('epoch %d, batch %d/%d, loss %f, acc %f' % (epoch, current_batch, num_batch, loss.data[0], acc.data[0]))
                print('batch loading used time %f, model forward used time %f' % (toc - tic, toc_r - tic_r))
            if current_batches % 100 == 0:
                writer.export_scalars_to_json(hp.model_dir + '/all_scalars.json')
        with open(hp.model_dir + '/history.pkl', 'wb') as out_file:
            pickle.dump(history, out_file)
        checkpoint_path = hp.model_dir + '/model_epoch_%02d' % epoch + '.pth'
        torch.save(model.state_dict(), checkpoint_path)
        torch.save(optimizer.state_dict(), hp.model_dir + '/optimizer.pth')
Exemplo n.º 3
0
    def __init__(self, transformerModel, output_dir):
        self.transformerModel = transformerModel
        self.output_dir = output_dir
        self.Sources, self.Targets = load_train_data()
        _, self.idx2de = load_de_vocab()
        _, self.idx2en = load_en_vocab()

        os.makedirs(self.output_dir, exist_ok=True)
Exemplo n.º 4
0
def train():
    print('-' * 30)
    print('Loading and preprocessing train data...')
    print('-' * 30)

    # Prepare train and validation data

    imgs_scans_train, imgs_mask_train = load_train_data()
    imgs_scans_valid, imgs_mask_valid = load_validation_data()
    imgs_mask_train = imgs_mask_train.astype('float32')
    imgs_scans_train = imgs_scans_train.astype('float32')
    imgs_mask_train /= 255.  # scale masks to [0, 1]
    imgs_scans_train /= 128.  # scale input images to [0, 2]
    imgs_scans_train = imgs_scans_train - 1.  # scale input images to [-1, 1]

    imgs_scans_valid = imgs_scans_valid.astype('float32')
    imgs_mask_valid = imgs_mask_valid.astype('float32')
    imgs_mask_valid /= 255.  # scale masks to [0, 1]
    imgs_scans_valid /= 128.  # scale input images to [0, 2]
    imgs_scans_valid = imgs_scans_valid - 1.  # scale input images to [-1, 1]

    imgs_scans_train = np.repeat(
        imgs_scans_train, 3,
        axis=4)  # repeat three times before multi modality loading
    imgs_scans_valid = np.repeat(
        imgs_scans_valid, 3,
        axis=4)  # repeat three times before multi modality loading

    print('-' * 30)
    print('Creating and compiling model...')
    print('-' * 30)

    model = Unet_vgg.res_unet_vgg(image_depth=img_depth,
                                  image_rows=img_rows,
                                  image_cols=img_cols,
                                  train_encoder=False)

    model.compile(optimizer=Adam(lr=5e-5,
                                 beta_1=0.9,
                                 beta_2=0.999,
                                 epsilon=1e-08,
                                 decay=0.000000199),
                  loss=segmentation_loss,
                  metrics=['accuracy', dice_coef])

    model.summary()

    weight_dir = 'weights'
    if not os.path.exists(weight_dir):
        os.mkdir(weight_dir)
    model_checkpoint = ModelCheckpoint(os.path.join(weight_dir,
                                                    project_name + '.h5'),
                                       monitor='val_dice_coef',
                                       save_best_only=True,
                                       mode='max')

    # Load planar 3D encoder
    model.load_weights(os.path.join(weight_dir, 'planar_3d_vgg.h5'),
                       by_name=True)

    log_dir = 'logs'
    if not os.path.exists(log_dir):
        os.mkdir(log_dir)
    csv_logger = CSVLogger(os.path.join(log_dir, project_name + '.txt'),
                           separator=',',
                           append=False)

    print('-' * 30)
    print('Fitting model...')
    print('-' * 30)

    model.fit(
        x=imgs_scans_train,
        y=imgs_mask_train,
        batch_size=3,
        epochs=100,
        callbacks=[model_checkpoint, csv_logger],
        shuffle=True,
        validation_data=(imgs_scans_valid, imgs_mask_valid),
    )

    print('-' * 30)
    print('Training finished')
    print('-' * 30)
Exemplo n.º 5
0
if __name__ == '__main__':
    parser = argparse.ArgumentParser()

    parser.add_argument("--batch_size", type=int, default=256)
    parser.add_argument("--dim_model", type=int, default=128)
    parser.add_argument("--num_iterations", type=int, default=100000)
    parser.add_argument("--vocab_path", type=str, default="./corpora/vocab.txt")

    args = parser.parse_args()

    device = torch.device('cuda' if torch.cuda.is_available() else 'gpu')

    with open(args.vocab_path, "rt") as f:
        n_vocab = len(f.readlines())
    X, class_weights = load_train_data("./data")
    n_class = len(class_weights)
    model = make_model(n_vocab, d_model=args.dim_model, batch_size=args.batch_size, n_class=n_class)
    model = model.to(device)
    print(model)
    # epochs = 10
    for iter in range(args.num_iterations):
        x, y = next_batch(X, args.batch_size, args.dim_model)
    # batches = get_batches(in_text, out_text, 10, 200)
    # for x, y in batches:
        x = torch.tensor(x, dtype=torch.float, device=device)
        y = torch.tensor(y, dtype=torch.long, device=device)
        y = torch.squeeze(y)
        criterion, optimizer = get_criterion(model)
        optimizer.zero_grad()
        output = model(x, None)
Exemplo n.º 6
0
    parser.add_argument('--num_batches', type=int, default=30000)
    parser.add_argument('--save_every', type=int, default=3000)
    parser.add_argument('--maxlen', type=int, default=128)
    parser.add_argument('--weight_tying', type=int, default=0)
    parser.add_argument('--hidden_units', type=int, default=256)
    parser.add_argument('--weighted_loss', type=int, default=1)
    parser.add_argument('--dropout_rate', type=float, default=0.4)
    parser.add_argument('--lr', type=float, default=0.001)
    parser.add_argument('--num_blocks', type=int, default=1)
    parser.add_argument('--num_heads', type=int, default=8)
    parser.add_argument('--num_epochs', type=int, default=9)
    parser.add_argument('--sinusoid', type=int, default=0)

    args = parser.parse_args()
    word2idx, idx2word = load_vocab(args.vocab_path)
    X, class_weights = load_train_data(args.train_path)
    args.vocab_size = len(word2idx)
    if args.weighted_loss:
        args.class_weights = class_weights
    with open(os.path.join(args.logdir, "args.pkl"), 'wb') as f:
        pickle.dump(args, f)

    # Construct graph
    model = TransformerDecoder(is_training=True, args=args)
    print("Graph loaded")

    # Start session
    with tf.Session(graph=model.graph) as sess:
        sess.run(tf.global_variables_initializer())
        saver = tf.train.Saver(tf.global_variables())
        ckpt = tf.train.get_checkpoint_state(args.ckpt_path)
Exemplo n.º 7
0
def train():
    print("Graph loading......Model name:{}".format(hp.modelname))
    g = Graph()
    print("Data loading...")
    _, eng_names, _, kor_names = load_train_data()
    _, val_eng_names, _, val_kor_names = load_evaluate_data(
        eval_mode="validate")

    early_stopping_count = 0
    data_list = list(range(len(eng_names)))
    with g.graph.as_default():
        sv = tf.train.Saver()
        with tf.Session() as sess:
            # Initialize
            sess.run(tf.global_variables_initializer())
            best_valid_loss = 100000.
            for epoch in range(1, hp.num_epochs + 1):
                np.random.shuffle(data_list)
                # # Attention Plot per epochs
                # al = sess.run(g.alignments, {g.x: eng_names[data_list][:1],
                #                              g.y: kor_names[data_list][:1]})
                # plot_alignment(al[0], epoch - 1, eng_names[data_list][:1], kor_names[data_list][:1])
                # Train
                train_loss = 0
                num_batch = len(eng_names) / hp.batch_size
                for step in tqdm(range(num_batch),
                                 total=num_batch,
                                 ncols=70,
                                 leave=False,
                                 unit='b'):
                    name_ids = data_list[step *
                                         hp.batch_size:step * hp.batch_size +
                                         hp.batch_size]
                    loss, gs = sess.run([g.train_op, g.global_step], {
                        g.x: eng_names[name_ids],
                        g.y: kor_names[name_ids]
                    })
                    train_loss += loss
                    if step % 20 == 0:
                        print('\t step:{} train_loss:{:.3f}'.format(gs, loss))
                train_loss /= num_batch

                # Validation
                valid_loss = 0.
                for idx in range(0, len(val_eng_names), hp.batch_size):
                    v_loss = sess.run(
                        g.mean_loss, {
                            g.x: val_eng_names[idx:idx + hp.batch_size],
                            g.y: val_kor_names[idx:idx + hp.batch_size]
                        })
                    valid_loss += v_loss
                valid_loss /= len(val_eng_names) / hp.batch_size
                print(
                    "[epoch{}] train_loss={:.3f} validate_loss={:.3f} ".format(
                        epoch, train_loss, valid_loss))
                # Stopping
                if valid_loss <= best_valid_loss * 0.999:
                    best_valid_loss = valid_loss
                    sv.save(sess, "logdir/" + hp.modelname + '/model.ckpt')
                else:
                    if hp.is_earlystopping:
                        early_stopping_count += 1
                        if early_stopping_count == 3:
                            print("Early Stopping...")
                            break
Exemplo n.º 8
0
            if cleared not in ref_:
                ref_.append(cleared)
            ref.append(ref_.index(cleared))

        ref = np.asarray(ref).astype(np.float)
        args.target_vocab_size = len(word2idx)
        args.vocab_size = ref.shape[0]
    else:
        args.vocab_size = len(word2idx)

    with open(os.path.join(args.logdir, "args.pkl"), 'wb') as f:
        pickle.dump(args, f)
    # Construct graph
    model = TransformerDecoder(is_training=True, args=args)
    print("Graph loaded")
    X = load_train_data(args.train_path, args.vocab_path, args.maxlen)
    pad_idx = word2idx["<pad>"]
    num_batch = len(X) // args.batch_size

    # Start session
    with tf.Session(graph=model.graph) as sess:
        sess.run(tf.global_variables_initializer())
        saver = tf.train.Saver(tf.global_variables())
        ckpt = tf.train.get_checkpoint_state(args.ckpt_path)
        if ckpt:
            print("restoring from {}".format(ckpt.model_checkpoint_path))
            saver.restore(sess, ckpt.model_checkpoint_path)

        for epoch in range(1, args.num_epochs + 1):
            gs = sess.run(model.global_step)
            for step in range(num_batch - 1):
Exemplo n.º 9
0
    fig.colorbar(im, ax=ax)
    plt.xlabel('Decoder timestep')
    plt.ylabel('Encoder timestep')
    plt.savefig(hp.logdir + '/alignment_%d' % gs, format='png')


if __name__ == '__main__':
    # Load vocabulary
    de2idx, idx2de = load_de_vocab()
    en2idx, idx2en = load_en_vocab()

    # Construct graph
    g = Graph("train")
    print("Graph loaded")

    X, Y = load_train_data()

    # calc total batch count
    num_batch = len(X) // hp.batch_size
    print(X.shape)
    g.num_batch = num_batch
    # Start session
    sv = tf.train.Supervisor(graph=g.graph,
                             logdir=hp.logdir,
                             summary_op=None,
                             save_model_secs=0)
    with sv.managed_session() as sess:
        i = 0
        for epoch in range(1, hp.num_epochs + 1):
            if sv.should_stop(): break
            for step in tqdm(range(g.num_batch),
Exemplo n.º 10
0
parser.add_argument("mode", help="train or eval")
args = parser.parse_args()

en2idx, idx2en = load_de_en_vocab('processed-data/en.vocab.tsv')
de2idx, idx2de = load_de_en_vocab('processed-data/zh.vocab.tsv')
print("读取en,zh字典")

# load train data
en_npy_path = "./processed-data/train_en.npy"
zh_npy_path = "./processed-data/train_zh.npy"
if os.path.exists(en_npy_path) and os.path.exists(zh_npy_path):
    print("load training data")
    X = np.load(en_npy_path)
    Y = np.load(zh_npy_path)
else:
    X, Y = load_train_data(de2idx, en2idx)
    np.save(en_npy_path, X)
    np.save(zh_npy_path, Y)

# load test data
test_en_path = "./processed-data/test_en.npy"
test_s_path = "./processed-data/t_source.npy"
test_t_path = "./processed-data/t_target.npy"
if os.path.exists(test_en_path) and os.path.exists(
        test_s_path) and os.path.exists(test_t_path):
    print("load testing data")
    X_test = np.load(test_en_path)
    Source_test = np.load(test_s_path)
    Target_test = np.load(test_t_path)
else:
    X_test, Source_test, Target_test = load_test_data(de2idx, en2idx)