예제 #1
0
def main():
    args = sys.argv

    batch_size = 128
    epochs = 100
    maxlen = 300
    model_path = 'models/cnn_model.h5'
    num_words = 40000
    num_label = 2

    x, y = load_dataset('data/amazon_reviews_multilingual_JP_v1_00.tsv')

    x = preprocess_dataset(x)
    x_train, x_test, y_train, y_test = train_test_split(x,
                                                        y,
                                                        test_size=0.2,
                                                        random_state=42)

    vocab = build_vocabulary(x_train, num_words)
    x_train = vocab.texts_to_sequences(x_train)
    x_test = vocab.texts_to_sequences(x_test)
    x_train = pad_sequences(x_train, maxlen=maxlen, truncating='post')
    x_test = pad_sequences(x_test, maxlen=maxlen, truncating='post')

    emb_flg = args[0]
    if emb_flg == 't':
        wv = load_fasttext('../chap08/models/cc.ja.300.vec.gz')
        wv = filter_embeddings(wv, vocab.word_index, num_words)
    else:
        wv = None

    model = CNNModel(num_words, num_label, embeddings=wv).build()
    model.compile(optimizer='adam',
                  loss='sparse_categorical_crossentropy',
                  metrics=['acc'])

    callbacks = [
        EarlyStopping(patience=3),
        ModelCheckpoint(model_path, save_best_only=True)
    ]

    model.fit(x=x_train,
              y=y_train,
              batch_size=batch_size,
              epochs=epochs,
              validation_split=0.2,
              callbacks=callbacks,
              shuffle=True)

    model = load_model(model_path)
    api = InferenceAPI(model, vocab, preprocess_dataset)
    y_pred = api.predict_from_sequences(x_test)

    print('precision: {:.4f}'.format(
        precision_score(y_test, y_pred, average='binary')))
    print('recall   : {:.4f}'.format(
        recall_score(y_test, y_pred, average='binary')))
    print('f1   : {:.4f}'.format(f1_score(y_test, y_pred, average='binary')))
예제 #2
0
def main():
    batch_size = 128
    epochs = 100
    maxlen = 300
    model_path = "cnn_model.h5"
    num_words = 40000
    num_label = 2

    x, y = load_dataset("data/amazon_reviews_multilingual_JP_v1_00.tsv")

    x = preprocess_dataset(x)
    x_train, x_test, y_train, y_test = train_test_split(x,
                                                        y,
                                                        test_size=0.2,
                                                        random_state=42)
    vocab = build_vocabulary(x_train, num_words)
    x_train = vocab.texts_to_sequences(x_train)
    x_test = vocab.texts_to_sequences(x_test)
    x_train = pad_sequences(x_train, maxlen=maxlen, truncating="post")
    x_test = pad_sequences(x_test, maxlen=maxlen, truncating="post")

    wv = load_fasttext("data/cc.ja.300.vec.gz")
    wv = filter_embeddings(wv, vocab.word_index, num_words)

    model = CNNModel(num_words, num_label, embeddings=wv).build()
    model.compile(optimizer="adam",
                  loss="sparse_categorical_crossentropy",
                  metrics=["acc"])

    callbakcs = [
        EarlyStopping(patience=3),
        ModelCheckpoint(model_path, save_best_only=True)
    ]

    model.fit(x=x_train,
              y=y_train,
              batch_size=batch_size,
              epochs=epochs,
              validation_split=0.2,
              callbacks=callbakcs,
              shuffle=True)

    model = load_model(model_path)
    api = InferenceAPI(model, vocab, preprocess_dataset)
    y_pred = api.predict_from_sequence(x_test)
    print("precision: {:.4f}".format(
        precision_score(y_test, y_pred, average="binary")))
    print("recall: {:.4f}".format(
        recall_score(y_test, y_pred, average="binary")))
    print("f1: {:.4f}".format(f1_score(y_test, y_pred, average="binary")))
예제 #3
0
파일: test_utils.py 프로젝트: yukw777/gata
def test_load_fasttext():
    preprocessor = SpacyPreprocessor([PAD, UNK, "my", "name", "is", "peter"])
    emb = load_fasttext("test-data/test-fasttext.vec", preprocessor)
    word_ids, _ = preprocessor.preprocess_tokenized([
        ["hi", "there", "what's", "your", "name"],
        ["my", "name", "is", "peter"],
    ])
    embedded = emb(word_ids)
    # OOVs
    assert embedded[0, :4].equal(
        emb(torch.tensor(preprocessor.unk_id)).unsqueeze(0).expand(4, -1))
    # name
    assert embedded[0, 4].equal(emb(torch.tensor(3)))
    # my name is peter
    assert embedded[1, :4].equal(emb(torch.tensor([2, 3, 4, 5])))
    # pad, should be zero
    assert embedded[1, 4].equal(torch.zeros(300))
예제 #4
0
def main():
    # ハイパーパラメータの背一定
    batch_size = 128
    epochs = 100
    maxlen = 300
    # model_path = 'models/rnn_model.h5'
    # model_path = 'models/lstm_model.h5'
    # model_path = 'models/CNN_model.h5'
    model_path = 'models/latm_iniemb_model.h5'
    num_words = 4000
    num_label = 2

    # データ・セットの読み込み
    x, y = load_dataset('data/amazon_reviews_multilingual_JP_v1_00.tsv')

    # データセットの前処理
    x = preprocess_dataset(x)
    x_train, x_test, y_train, y_test = train_test_split(x,
                                                        y,
                                                        test_size=0.2,
                                                        random_state=42)
    vocab = build_vocabulary(x_train, num_words)
    x_train = vocab.texts_to_sequences(x_train)
    x_test = vocab.texts_to_sequences(x_test)
    x_train = pad_sequences(x_train, maxlen=maxlen, truncating='post')
    x_test = pad_sequences(x_test, maxlen=maxlen, truncating='post')

    # 単語分散表現
    wv = load_fasttext('data/cc.ja.300.vec')
    wv = filter_embeddings(wv, vocab.word_index, num_words)

    # モデルの構築
    # model = RNNModel(num_words, num_label, embeddings=None).build()
    model = LSTMModel(num_words, num_label, embeddings=wv).build()
    # model = CNNModel(num_words, num_label, embeddings=None).build()
    model.compile(optimizer='adam',
                  loss='sparse_categorical_crossentropy',
                  metrics=['acc'])

    # コールバックの用意
    callbacks = [
        EarlyStopping(patience=3),
        ModelCheckpoint(model_path, save_best_only=True)
    ]

    # モデルの学習
    model.fit(x=x_train,
              y=y_train,
              batch_size=batch_size,
              epochs=epochs,
              validation_split=0.2,
              callbacks=callbacks,
              shuffle=True)

    # 予測
    model = load_model(model_path)
    api = InferenceAPI(model, vocab, preprocess_dataset)
    y_pred = api.predict_from_sequences(x_test)
    print('precision: {:.4f}'.format(
        precision_score(y_test, y_pred, average='binary')))
    print('recall: {:.4f}'.format(
        recall_score(y_test, y_pred, average='binary')))
    print('f1: {:.4f}'.format(f1_score(y_test, y_pred, average='binary')))
예제 #5
0
파일: main.py 프로젝트: shengchen-liu/Quora
def main():

    # 4.1 mkdirs
    if not os.path.exists(config.submit):
        os.makedirs(config.submit)
    if not os.path.exists(config.weights + config.model_name + os.sep + 'fold_'+str(config.fold)):
        os.makedirs(config.weights + config.model_name + os.sep + 'fold_'+ str(config.fold))
    if not os.path.exists(config.best_models):
        os.mkdir(config.best_models)
    if not os.path.exists(config.logs):
        os.mkdir(config.logs)
    if not os.path.exists(config.best_models + config.model_name ):
        os.mkdir(config.best_models + config.model_name)
    if not os.path.exists(config.best_models + config.model_name + os.sep + 'fold_'+str(config.fold)):
        os.mkdir(config.best_models + config.model_name + os.sep + 'fold_'+str(config.fold))
    tqdm.pandas()

    start_time = time.time()
    train_X, test_X, train_y, word_index = utils.load_and_prec(config)

    print("Start embedding matrix............")
    embedding_matrix_1 = utils.load_glove(word_index, config.embedding_dir, config.max_features)
    embedding_matrix_2 = utils.load_para(word_index, config.embedding_dir, config.max_features)
    embedding_matrix_3 = utils.load_fasttext(word_index, config.embedding_dir, config.max_features)

    total_time = (time.time() - start_time) / 60
    print("Took {:.2f} minutes".format(total_time))

    if config.embed_method == "mean":
        embedding_matrix = np.mean([embedding_matrix_1, embedding_matrix_2, embedding_matrix_3], axis=0)
    elif config.embed_method =="concat":
        embedding_matrix = np.concatenate((embedding_matrix_1, embedding_matrix_2, embedding_matrix_3), axis=1)
    print(np.shape(embedding_matrix))
    #
    # del embedding_matrix_1, embedding_matrix_2
    # del embedding_matrix_1

    # -------------------------------------------------------
    # training
    # -------------------------------------------------------
    train_preds = np.zeros((len(train_X)))
    test_preds = np.zeros((len(test_X)))

    x_test_cuda = torch.tensor(test_X, dtype=torch.long).cuda()
    test_dataset = torch.utils.data.TensorDataset(x_test_cuda)
    test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=config.batch_size, shuffle=False)

    splits = list(StratifiedKFold(n_splits=5, shuffle=True, random_state=SEED).split(train_X, train_y))

    sigmoid = nn.Sigmoid()
    loss_fn = torch.nn.BCEWithLogitsLoss(reduction="mean")



    # k-fold
    for fold, (train_idx, valid_idx) in enumerate(splits):
        print(f'Fold {fold + 1}')

        # tflogger
        tflogger = utils.TFLogger(os.path.join('../results', 'TFlogs',
                                         config.model_name + "_fold{0}_{1}".format(config.fold, fold)))
        # initialize the early_stopping object
        early_stopping = utils.EarlyStopping(patience=7, verbose=True)

        x_train_fold = torch.tensor(train_X[train_idx], dtype=torch.long).cuda()
        y_train_fold = torch.tensor(train_y[train_idx, np.newaxis], dtype=torch.float32).cuda()
        x_val_fold = torch.tensor(train_X[valid_idx], dtype=torch.long).cuda()
        y_val_fold = torch.tensor(train_y[valid_idx, np.newaxis], dtype=torch.float32).cuda()

        if config.model == "baseline_bidir_LSTM_GRU":
            model = baseline_bidir_LSTM_GRU.NeuralNet(config, embedding_matrix)
        elif config.model == "baseline_pytorch":
            model = baseline_pytorch.NeuralNet(config, embedding_matrix)
        elif config.model == "baseline_lstm_gru_attention":
            model = baseline_lstm_gru_attention.NeuralNet(config, embedding_matrix)
        elif config.model == "baseline_lstm_lstm_attention":
            model = baseline_lstm_lstm_attention.NeuralNet(config, embedding_matrix)
            
        model.cuda()

        optimizer = torch.optim.Adam(model.parameters(), lr=config.lr)

        # scheduler
        scheduler = lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1)

        train_dataset = torch.utils.data.TensorDataset(x_train_fold, y_train_fold)
        valid_dataset = torch.utils.data.TensorDataset(x_val_fold, y_val_fold)

        train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=config.batch_size, shuffle=True)
        valid_loader = torch.utils.data.DataLoader(valid_dataset, batch_size=config.batch_size, shuffle=False)

        valid_loss = np.inf

        # initialize best loss
        best_loss = np.inf
        start_time = timer()
        for epoch in range(config.epochs):
            scheduler.step(epoch)
            # train
            lr = utils.get_learning_rate(optimizer)
            train_loss = train(train_loader=train_loader,model=model,loss_fn=loss_fn, optimizer=optimizer,
                               epoch=epoch,valid_loss=valid_loss,start=start_time)

            # validate
            valid_loss, valid_output = evaluate(val_loader=valid_loader, model=model, loss_fn=loss_fn, epoch=epoch,
                                                train_loss=train_loss, start_time=start_time)
            test_preds_fold = np.zeros(len(test_X))

            # check results
            is_best_loss = valid_loss < best_loss
            if is_best_loss:
                best_epoch = epoch
                best_train_loss = train_loss
            # update best loss
            best_loss = min(valid_loss, best_loss)

            # save NeuralNet
            utils.save_checkpoint({
                "epoch": epoch,
                "model_name": config.model_name,
                "state_dict": model.state_dict(),
                "optimizer": optimizer.state_dict(),
                "fold": config.fold,
                "kfold": config.fold,
            },is_best_loss, config.fold, fold, config)
            # print logs
            print('\r', end='', flush=True)

            message = '%s  %5.1f %6.1f  %.2E |       %0.3f        |       %0.3f       | %s' % ( \
                "best", best_epoch, best_epoch, Decimal(lr),
                best_train_loss,
                best_loss,
                utils.time_to_str((timer() - start_time), 'min'))
            log.write(message)

            log.write("\n")
            time.sleep(0.01)

            # ================================================================== #
            #                        Tensorboard Logging                         #
            # ================================================================== #

            # 1. Log scalar values (scalar summary)
            info = {'Train_loss': train_loss,
                    'Valid_loss': valid_loss,
                    'Learnging_rate': lr}

            for tag, value in info.items():
                tflogger.scalar_summary(tag, value, epoch)

            # 2. Log values and gradients of the parameters (histogram summary)
            for tag, value in model.named_parameters():
                tag = tag.replace('.', '/')
                tflogger.histo_summary(tag, value.data.cpu().numpy(), epoch)
                if not value.grad is None:
                    tflogger.histo_summary(tag + '/grad', value.grad.data.cpu().numpy(), epoch)
            # -------------------------------------
            # end tflogger

            # ================================================================== #
            #                        Early stopping                         #
            # ================================================================== #
            # early_stopping needs the validation loss to check if it has decresed,
            # and if it has, it will make a checkpoint of the current NeuralNet
            early_stopping(valid_loss, model)

            if early_stopping.early_stop:
                print("Early stopping")
                break

        # end looping all epochs
        train_preds[valid_idx] = sigmoid(valid_output).cpu().data.numpy()[:, 0]

        # test
        checkpoint_path = os.path.join("{0}{1}/fold_{2}/fold_{3}_model_best_loss.pth.tar".
                        format(config.best_models, config.model_name, str(config.fold), fold))

        best_model = torch.load(checkpoint_path)
        print("Test on epoch:", best_model['epoch'])
        model.load_state_dict(best_model["state_dict"])
        test_preds_fold = test(test_loader=test_loader, model=model)
        test_preds += test_preds_fold / len(splits)

    # end k-fold
    search_result = threshold_search(train_y, train_preds)
    print(search_result)
    log.write("Threshold:{0},    f1:{1}".format(search_result['threshold'], search_result['f1']))

    sub = pd.read_csv('../input/sample_submission.csv')
    sub.prediction = test_preds > search_result['threshold']
    sub.to_csv("submission_{0}.csv".format(config.model_name), index=False)

    print('Test successful!')
예제 #6
0
def main(args):
    print "loadding data and labels from dataset"
    train = pd.read_csv(args.train_dir)
    ch_train = pd.read_csv(args.chtrain_dir)
    x_train = train["comment_text"]
    x_chtrain = ch_train["comment_text"]
    target_cols = [
        'toxic', 'severe_toxic', 'obscene', 'threat', 'insult', 'identity_hate'
    ]

    x = []
    x_ch = []
    for line in x_train:
        if len(line) > 0:
            x.append(utils.review_to_wordlist(line.strip()))
    print "loaded %d comments from dataset" % len(x)
    for line in x_chtrain:
        if len(line) > 0:
            x_ch.append(utils.review_to_wordlist_char(line.strip()))
    print "loaded %d comments from dataset" % len(x)
    y = train[target_cols].values

    index2word, word2index = utils.load_vocab(args.vocab_dir)
    index2char, char2index = utils.load_char(args.char_dir)
    x_vector = utils.vectorize(x, word2index, verbose=False)
    x_vector = np.array(x_vector)
    char_vector = utils.vectorize_char(x_ch, char2index, verbose=False)
    char_vector = np.array(char_vector)
    print char_vector[0]

    save_dir = os.path.join(args.save_dir, args.model_type)
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)

    if args.model_type in ["cnn", "cnnfe", "chcnn", "chcnn2"]:
        max_step = args.max_step_cnn
        max_size = args.max_size_cnn
        nb_epochs = args.nb_epochs_cnn
    elif args.model_type in [
            "rnn", "rnnfe", "rnnfe2", "chrnn", "chrnnfe", "rcnn"
    ]:
        max_step = args.max_step_rnn
        max_size = args.max_size_rnn
        nb_epochs = args.nb_epochs_rnn

    ex_features = add_features("../data/train.csv")
    nfolds = args.nfolds
    skf = KFold(n_splits=nfolds, shuffle=True, random_state=2018)
    test_prob = []
    stack_logits = np.zeros((len(x_vector), len(target_cols)))
    for (f, (train_index, test_index)) in enumerate(skf.split(x_vector)):
        x_train, x_eval = x_vector[train_index], x_vector[test_index]
        char_train, char_eval = char_vector[train_index], char_vector[
            test_index]
        y_train, y_eval = y[train_index], y[test_index]
        with tf.Graph().as_default():
            config_proto = utils.get_config_proto()
            sess = tf.Session(config=config_proto)
            if args.model_type == "cnn":
                model = TextCNN(args, "TextCNN")
            elif args.model_type == "cnnfe":
                model = TextCNNFE(args, "TextCNNFE")
            elif args.model_type == "rnn":
                model = TextRNN(args, "TextRNN")
            elif args.model_type == "rnnfe":
                model = TextRNNFE(args, "TextRNNFE")
            elif args.model_type == "rcnn":
                model = TextRCNN(args, "TextRCNN")
            elif args.model_type == "attention":
                model = RNNWithAttention(args, "Attention")
            elif args.model_type == "chrnn":
                model = TextRNNChar(args, "TextRNNChar")
            elif args.model_type == "chcnn":
                model = TextCNNChar(args, "TextCNNChar")
            elif args.model_type == "chcnn2":
                model = TextCNNChar(args, "TextCNNChar2")
            elif args.model_type == "rnnfe2":
                model = TextRNNFE2(args, "TextCNNCharFE2")
            elif args.model_type == "chrnnfe":
                model = TextRNNCharFE(args, "TextCNNCharFE")
            else:
                raise ValueError("Unknown model_type %s" % args.model_type)
            sess.run(tf.global_variables_initializer())

            if args.use_ft:
                pretrain_dir = args.ft_dir
                print "use FastText word vector"
                embedding = utils.load_fasttext(pretrain_dir, index2word)
            if not args.use_ft:
                pretrain_dir = args.glove_dir
                print "use Glove word vector"
                embedding = utils.load_glove(pretrain_dir, index2word)
            sess.run(model.embedding_init,
                     {model.embedding_placeholder: embedding})

            for line in model.tvars:
                print line

            print "training %s model for toxic comments classification" % (
                args.model_type)
            print "%d fold start training" % f
            for epoch in range(1, nb_epochs + 1):
                print "epoch %d start with lr %f" % (
                    epoch,
                    model.learning_rate.eval(session=sess)), "\n", "- " * 50
                loss, total_comments = 0.0, 0
                if args.model_type in ["cnn", "rnn", "rcnn"]:
                    train_batch = utils.get_batches(x_train, y_train,
                                                    args.batch_size,
                                                    args.max_len)
                    valid_batch = utils.get_batches(x_eval, y_eval, max_size,
                                                    args.max_len, False)

                elif args.model_type in ["chrnn", "chcnn", "chcnn2"]:
                    train_batch = utils.get_batches_with_char(
                        x_train, char_train, y_train, args.batch_size,
                        args.max_len)
                    valid_batch = utils.get_batches_with_char(
                        x_eval, char_eval, y_eval, max_size, args.max_len,
                        False)

                elif args.model_type in ["rnnfe", "cnnfe", "rnnfe2"]:
                    train_batch = utils.get_batches_with_fe(
                        x_train, y_train, ex_features, args.batch_size,
                        args.max_len)
                    valid_batch = utils.get_batches_with_fe(
                        x_eval, y_eval, ex_features, max_size, args.max_len,
                        False)

                elif args.model_type in ["chrnnfe"]:
                    train_batch = utils.get_batches_with_charfe(
                        x_train, char_train, y_train, ex_features,
                        args.batch_size, args.max_len)
                    valid_batch = utils.get_batches_with_charfe(
                        x_eval, char_eval, y_eval, ex_features, max_size,
                        args.max_len, False)

                epoch_start_time = time.time()
                step_start_time = epoch_start_time
                for idx, batch in enumerate(train_batch):
                    if args.model_type in ["cnn", "rnn", "rcnn"]:
                        comments, comments_length, labels = batch
                        _, loss_t, global_step, batch_size = model.train(
                            sess, comments, comments_length, labels)

                    elif args.model_type in ["chrnn", "chcnn", "chcnn2"]:
                        comments, comments_length, chs, labels = batch
                        _, loss_t, global_step, batch_size = model.train(
                            sess, comments, comments_length, chs, labels)

                    elif args.model_type in ["rnnfe", "cnnfe", "rnnfe2"]:
                        comments, comments_length, exs, labels = batch
                        _, loss_t, global_step, batch_size = model.train(
                            sess, comments, comments_length, labels, exs)

                    elif args.model_type in ["chrnnfe"]:
                        comments, comments_length, chs, exs, labels = batch
                        _, loss_t, global_step, batch_size = model.train(
                            sess, comments, comments_length, chs, labels, exs)

                    loss += loss_t * batch_size
                    total_comments += batch_size

                    if global_step % 200 == 0:
                        print "epoch %d step %d loss %f time %.2fs" % (
                            epoch, global_step, loss_t,
                            time.time() - step_start_time)

                    if global_step % 200 == 0:
                        _ = run_valid(valid_batch, model, sess,
                                      args.model_type)
                        # model.saver.save(sess, os.path.join(save_dir, "model.ckpt"), global_step=global_step)
                        step_start_time = time.time()

                epoch_time = time.time() - epoch_start_time
                sess.run(model.learning_rate_decay_op)
                print "%.2f seconds in this epoch with train loss %f" % (
                    epoch_time, loss / total_comments)

            test_prob.append(run_test(args, model, sess))
            stack_logits[test_index] = run_valid(valid_batch, model, sess,
                                                 args.model_type)

    preds = np.zeros((test_prob[0].shape[0], len(target_cols)))
    for prob in test_prob:
        preds += prob
        print prob[0]
    preds /= len(test_prob)
    print len(test_prob)
    write_predict(stack_logits, args.model_type)
    write_results(preds, args.model_type)
예제 #7
0
    def __init__(
        self,
        hidden_dim: int = 8,
        word_emb_dim: int = 300,
        node_emb_dim: int = 12,
        relation_emb_dim: int = 10,
        text_encoder_num_blocks: int = 1,
        text_encoder_num_conv_layers: int = 3,
        text_encoder_kernel_size: int = 5,
        text_encoder_num_heads: int = 1,
        graph_encoder_num_cov_layers: int = 4,
        graph_encoder_num_bases: int = 3,
        text_decoder_num_blocks: int = 1,
        text_decoder_num_heads: int = 1,
        learning_rate: float = 5e-4,
        sample_k_gen_obs: int = 5,
        max_decode_len: int = 200,
        steps_for_lr_warmup: int = 10000,
        pretrained_word_embedding_path: Optional[str] = None,
        word_vocab_path: Optional[str] = None,
        node_vocab_path: Optional[str] = None,
        relation_vocab_path: Optional[str] = None,
        **kwargs,
    ) -> None:
        super().__init__()
        self.save_hyperparameters(
            "hidden_dim",
            "word_emb_dim",
            "node_emb_dim",
            "relation_emb_dim",
            "text_encoder_num_blocks",
            "text_encoder_num_conv_layers",
            "text_encoder_kernel_size",
            "text_encoder_num_heads",
            "graph_encoder_num_cov_layers",
            "graph_encoder_num_bases",
            "text_decoder_num_blocks",
            "text_decoder_num_heads",
            "learning_rate",
            "sample_k_gen_obs",
            "max_decode_len",
            "steps_for_lr_warmup",
        )

        # initialize word (preprocessor), node and relation stuff
        (
            node_name_word_ids,
            node_name_mask,
            rel_name_word_ids,
            rel_name_mask,
        ) = self.init_word_node_rel(
            word_vocab_path=to_absolute_path(word_vocab_path)
            if word_vocab_path is not None else None,
            node_vocab_path=to_absolute_path(node_vocab_path)
            if node_vocab_path is not None else None,
            relation_vocab_path=to_absolute_path(relation_vocab_path)
            if relation_vocab_path is not None else None,
        )

        # load pretrained word embedding and freeze it
        if pretrained_word_embedding_path is not None:
            pretrained_word_embedding = load_fasttext(
                to_absolute_path(pretrained_word_embedding_path),
                self.preprocessor)
        else:
            pretrained_word_embedding = nn.Embedding(self.num_words,
                                                     word_emb_dim)
        pretrained_word_embedding.weight.requires_grad = False

        # graph updater
        self.graph_updater = GraphUpdater(
            self.hparams.hidden_dim,  # type: ignore
            self.hparams.word_emb_dim,  # type: ignore
            len(self.node_vocab),
            self.hparams.node_emb_dim,  # type: ignore
            len(self.relation_vocab),
            self.hparams.relation_emb_dim,  # type: ignore
            self.hparams.text_encoder_num_blocks,  # type: ignore
            self.hparams.text_encoder_num_conv_layers,  # type: ignore
            self.hparams.text_encoder_kernel_size,  # type: ignore
            self.hparams.text_encoder_num_heads,  # type: ignore
            self.hparams.graph_encoder_num_cov_layers,  # type: ignore
            self.hparams.graph_encoder_num_bases,  # type: ignore
            pretrained_word_embedding,
            node_name_word_ids,
            node_name_mask,
            rel_name_word_ids,
            rel_name_mask,
        )
        self.graph_updater.pretraining = True

        # text decoder
        self.text_decoder = TextDecoder(text_decoder_num_blocks, hidden_dim,
                                        text_decoder_num_heads)
        self.target_word_prj = nn.Linear(hidden_dim,
                                         self.num_words,
                                         bias=False)
        self.ce_loss = nn.CrossEntropyLoss(
            ignore_index=self.preprocessor.pad_id, reduction="none")