Пример #1
0
def eval(): 
    # Load graph
    g = Graph(is_training=False)
    print("Graph loaded")
    
    # Load data
    X, Sources, Targets = load_test_data()
    de2idx, idx2de = load_vocab('src')
    en2idx, idx2en = load_vocab('trg')
     
#     X, Sources, Targets = X[:33], Sources[:33], Targets[:33]
     
    # Start session         
    with g.graph.as_default():    
        sv = tf.train.Supervisor()
        with sv.managed_session(config=tf.ConfigProto(allow_soft_placement=True)) as sess:
            ## Restore parameters
            sv.saver.restore(sess, tf.train.latest_checkpoint(hp.logdir))
            print("Restored!")
              
            ## Get model name
            mname = open(hp.logdir + '/checkpoint', 'r').read().split('"')[1] # model name
             
            ## Inference
            if not os.path.exists('results'): os.mkdir('results')
            with codecs.open("results/" + mname, "w", "utf-8") as fout:
                list_of_refs, hypotheses = [], []
                for i in range(len(X) // hp.batch_size):
                     
                    ### Get mini-batches
                    x = X[i*hp.batch_size: (i+1)*hp.batch_size]
                    sources = Sources[i*hp.batch_size: (i+1)*hp.batch_size]
                    targets = Targets[i*hp.batch_size: (i+1)*hp.batch_size]
                     
                    ### Autoregressive inference
                    preds = np.zeros((hp.batch_size, hp.maxlen), np.int32)
                    for j in range(hp.maxlen):
                        _preds = sess.run(g.preds, {g.x: x, g.y: preds})
                        preds[:, j] = _preds[:, j]
                     
                    ### Write to file
                    for source, target, pred in zip(sources, targets, preds): # sentence-wise
                        got = " ".join(idx2en[idx] for idx in pred).split("</S>")[0].strip()
                        fout.write("- source: " + source +"\n")
                        fout.write("- expected: " + target + "\n")
                        fout.write("- got: " + got + "\n\n")
                        fout.flush()
                          
                        # bleu score
                        ref = target.split()
                        hypothesis = got.split()
                        if len(ref) > 3 and len(hypothesis) > 3:
                            list_of_refs.append([ref])
                            hypotheses.append(hypothesis)
              
                ## Calculate bleu score
                score = corpus_bleu(list_of_refs, hypotheses)
                fout.write("Bleu Score = " + str(100*score))
Пример #2
0
 def __init__(self, hp):
     self.hp = hp
     self.en_token2idx, self.en_idx2token = load_vocab(hp.en_vocab)
     self.ch_token2idx, self.ch_idx2token = load_vocab(hp.ch_vocab)
     self.en_embeddings, self.ch_embeddings = get_token_embeddings(
         self.hp.en_vocab_size,
         self.hp.ch_vocab_size,
         self.hp.d_model,
         zero_pad=True)
Пример #3
0
def load_model(model_path):
    # Load data
    source2idx, idx2source = load_vocab(params.src_vocab)
    target2idx, idx2target = load_vocab(params.tgt_vocab)
    encoder_vocab = len(source2idx)
    decoder_vocab = len(target2idx)

    # load model
    model = Transformer(params, encoder_vocab, decoder_vocab)
    model.load_state_dict(torch.load(model_path))
    print('Model Loaded.')
    model.eval()
    model.cuda()

    return model, source2idx, idx2target
Пример #4
0
def load_distinct_data(mode="train"):
    word2idx, idx2word = load_vocab()

    Y = []

    for line in codecs.open(hp.data, 'r', 'utf-8'):
        sent = line.strip().split(" ")
        sent = sent[1:]
        sent = ' '.join(sent)
        sent = normalize(sent)
        words = sent.split()

        if len(words) <= hp.maxlen:
            sent_ids = [word2idx.get(word, 0) for word in words]
            if 0 not in sent_ids:  # We do not include a sentence if it has any unknown words.
                Y.append(np.array(sent_ids, np.int32).tostring())

    #print("###F",Y[0])
    random.shuffle(Y)
    #print("###S",Y[0])
    '''
    if mode=="train":
        Y = Y[:-hp.batch_size]
    else: # test
        Y = Y[-hp.batch_size:]
    '''
    print("# Y =", len(Y))
    return Y
Пример #5
0
def main():
    g = Graph(is_training=False)

    # Load vocab
    pnyn2idx, idx2pnyn, hanzi2idx, idx2hanzi = load_vocab()

    with g.graph.as_default():
        sv = tf.train.Supervisor()
        with sv.managed_session(config=tf.ConfigProto(
                allow_soft_placement=True)) as sess:
            # Restore parameters
            sv.saver.restore(sess, tf.train.latest_checkpoint(hp.logdir))
            print("Restored!")

            # Get model
            mname = open(hp.logdir + '/checkpoint',
                         'r').read().split('"')[1]  # model name
            while True:
                line = input("请输入测试拼音:")

                if len(line) > hp.maxlen:
                    print('最长拼音不能超过50')
                    continue
                x = load_test_string(pnyn2idx, line)
                #print(x)
                preds = sess.run(g.preds, {g.x: x})
                #got = "".join(idx2hanzi[str(idx)] for idx in preds[0])[:np.count_nonzero(x[0])].replace("_", "")
                got = "".join(
                    idx2hanzi[idx]
                    for idx in preds[0])[:np.count_nonzero(x[0])].replace(
                        "_", "")
                print(got)
Пример #6
0
    def __init__(self, hp):
        self.hp = hp
        self.token2idx, self.idx2token, self.hp.vocab_size = load_vocab(
            hp.vocab)
        self.embd = None
        if self.hp.preembedding:
            self.embd = loadGloVe(self.hp.vec_path)
        self.embeddings = get_token_embeddings(self.embd,
                                               self.hp.vocab_size,
                                               self.hp.d_model,
                                               zero_pad=False)
        self.input_sup = tf.placeholder(tf.int32, [None, self.hp.maxlen],
                                        name="input_sup")
        self.input_ori = tf.placeholder(tf.int32, [None, self.hp.maxlen],
                                        name="input_ori")
        self.input_aug = tf.placeholder(tf.int32, [None, self.hp.maxlen],
                                        name="input_aug")
        self.sup_len = tf.placeholder(tf.int32, [None])
        self.ori_len = tf.placeholder(tf.int32, [None])
        self.aug_len = tf.placeholder(tf.int32, [None])
        self.truth = tf.placeholder(tf.int32, [None, self.hp.num_class],
                                    name="truth")
        self.is_training = tf.placeholder(tf.bool,
                                          shape=None,
                                          name="is_training")
        self.model = True

        # self.logits_sup, self.logits_ori, self.logits_aug = self._logits_op()
        self.loss = self._loss_op()
        self.acc = self._acc_op()
        self.global_step = self._globalStep_op()
        self.train = self._training_op()
Пример #7
0
 def __init__(self, hp):
     self.hp = hp
     self.token2idx, self.idx2token = load_vocab(hp.vocab)
     # 字向量(tooke向量),将待翻译的每个字映射到目标词表中
     self.embeddings = get_token_embeddings(self.hp.vocab_size,
                                            self.hp.d_model,
                                            zero_pad=True)
Пример #8
0
def main():  
    g = Graph(is_training=False)
    
    # Load vocab
    pnyn2idx, idx2pnyn, hanzi2idx, idx2hanzi = load_vocab()
    
    with g.graph.as_default():    
        sv = tf.train.Supervisor()
        with sv.managed_session(config=tf.ConfigProto(allow_soft_placement=True)) as sess:
            # Restore parameters
            sv.saver.restore(sess, tf.train.latest_checkpoint(hp.logdir)); print("Restored!")

            # Get model
            mname = open(hp.logdir + '/checkpoint', 'r').read().split('"')[1] # model name
            while True:
                line = input("请输入测试拼音:")
                if len(line) > hp.maxlen:
                    print('最长拼音不能超过50')
                    continue
                x = load_test_string(pnyn2idx, line)
                #print(x)
                preds = sess.run(g.preds, {g.x: x})
                #got = "".join(idx2hanzi[str(idx)] for idx in preds[0])[:np.count_nonzero(x[0])].replace("_", "")
                got = "".join(idx2hanzi[idx] for idx in preds[0])[:np.count_nonzero(x[0])].replace("_", "")
                print(got)
Пример #9
0
    def _net1(self):
        with tf.variable_scope('net1'):
            # Load vocabulary
            phn2idx, idx2phn = load_vocab()

            # Pre-net
            prenet_out = prenet(self.x_mfcc,
                                num_units=[
                                    hp.Train1.hidden_units,
                                    hp.Train1.hidden_units // 2
                                ],
                                dropout_rate=hp.Train1.dropout_rate,
                                is_training=self.is_training)  # (N, T, E/2)

            # CBHG
            out = cbhg(prenet_out, hp.Train1.num_banks,
                       hp.Train1.hidden_units // 2,
                       hp.Train1.num_highway_blocks, hp.Train1.norm_type,
                       self.is_training)

            # Final linear projection
            logits = tf.layers.dense(out, len(phn2idx))  # (N, T, V)
            ppgs = tf.nn.softmax(logits / hp.Train1.t)  # (N, T, V)
            preds = tf.to_int32(tf.arg_max(logits, dimension=-1))  # (N, T)

        return ppgs, preds, logits
Пример #10
0
def eval(logdir):
    # Load graph
    model = Net1()

    # dataflow
    df = Net1DataFlow(hp.Test1.data_path, hp.Test1.batch_size)

    ckpt = tf.train.latest_checkpoint(logdir)

    pred_conf = PredictConfig(model=model,
                              input_names=get_eval_input_names(),
                              output_names=get_eval_output_names())
    if ckpt:
        pred_conf.session_init = SaverRestore(ckpt)
    predictor = OfflinePredictor(pred_conf)

    x_mfccs, y_ppgs = next(df().get_data())
    y_ppg_1d, pred_ppg_1d, summ_loss, summ_acc = predictor(x_mfccs, y_ppgs)

    # plot confusion matrix
    _, idx2phn = load_vocab()
    y_ppg_1d = [idx2phn[i] for i in y_ppg_1d]
    pred_ppg_1d = [idx2phn[i] for i in pred_ppg_1d]
    summ_cm = plot_confusion_matrix(y_ppg_1d, pred_ppg_1d, phns)

    writer = tf.summary.FileWriter(logdir)
    writer.add_summary(summ_loss)
    writer.add_summary(summ_acc)
    writer.add_summary(summ_cm)
    writer.close()
Пример #11
0
def plot_alignment(alignment, epoch, eng_name, kor_name):
    """Plots the alignment
    alignments: A list of (numpy) matrix of shape (encoder_steps, decoder_steps)
    epoch: epochs
    """
    _, x_i2w, _, y_i2w = load_vocab()
    non_padded_eng_name = eng_name[np.nonzero(eng_name)]
    non_padded_kor_name = kor_name[np.nonzero(kor_name)]
    txt_eng_name = " ".join(
        x_i2w[idx]
        for idx in non_padded_eng_name).encode('utf-8').split('E')[0]
    txt_kor_name = " ".join(y_i2w[idx]
                            for idx in non_padded_kor_name).encode('utf-8')
    txt_kor_name = txt_kor_name.replace('S', '').replace('E', '')
    fig, ax = plt.subplots()
    im = ax.imshow(alignment[:non_padded_eng_name.shape[0] -
                             1, :non_padded_kor_name.shape[0] - 1],
                   cmap='Greys')

    fig.colorbar(im)
    plt.title('{} epochs \n {} \n {}'.format(epoch, txt_eng_name,
                                             txt_kor_name))
    plt.savefig('{}/alignment_{}k.png'.format(hp.logdir + '/' + hp.modelname,
                                              epoch),
                format='png')
    plt.close()
Пример #12
0
 def __init__(self, hp):
     self.hp = hp
     self.token2idx, self.idx2token = load_vocab(hp.vocab)
     self.embeddings = get_token_embeddings(self.hp.vocab_size,
                                            self.hp.d_model,
                                            self.token2idx,
                                            self.hp.embedding_file,
                                            zero_pad=True)
Пример #13
0
 def __init__(self, context):
     self.context = context
     self.token2idx, self.idx2token = load_vocab(context.vocab)
     vocab_size = len(self.token2idx)
     # 其实这里的d_model可以是其它维度
     self.embeddings = get_token_embeddings(vocab_size,
                                            self.context.d_ff,
                                            zero_pad=False)
Пример #14
0
 def __init__(self, hp):
     self.hp = hp
     # 预测时词表用错! 应该用目标语言的词表而不是源语言的词表!!! 浪费了我四天的时间!!
     # 而且应该用dev的词表而不是train的!! 其实用train也可以的吧 因为train基本包括了dev的 dev的词表小会报keyerror
     self.token2idx, self.idx2token = load_vocab(hp.vocab1)
     self.embeddings = get_token_embeddings(self.hp.vocab_size,
                                            self.hp.d_model,
                                            zero_pad=True)
     print('embeddings size =', self.hp.vocab_size)
Пример #15
0
    def __init__(self, num=1, mode="train"):
        '''
        Args:
          num: Either 1 or 2. 1 for Text2Mel 2 for SSRN.
          mode: Either "train" or "synthesize".
        '''
        # Load vocabulary
        self.char2idx, self.idx2char = load_vocab()

        # Set flag
        training = True if mode == "train" else False

        # Graph
        # Data Feeding
        ## L: Text. (B, N), int32
        ## mels: Reduced melspectrogram. (B, T/r, n_mels) float32
        ## mags: Magnitude. (B, T, n_fft//2+1) float32

        self.L = tf.placeholder(tf.int32, shape=(None, None))
        self.mels = tf.placeholder(tf.float32, shape=(None, None, hp.n_mels))
        self.prev_max_attentions = tf.placeholder(tf.int32, shape=(None, ))

        with tf.variable_scope("Text2Mel"):
            # Get S or decoder inputs. (B, T//r, n_mels)
            self.S = tf.concat(
                (tf.zeros_like(self.mels[:, :1, :]), self.mels[:, :-1, :]), 1)

            # Networks
            with tf.variable_scope("TextEnc"):
                self.K, self.V = TextEnc(self.L,
                                         training=training)  # (N, Tx, e)

            with tf.variable_scope("AudioEnc"):
                self.Q = AudioEnc(self.S, training=training)

            with tf.variable_scope("Attention"):
                # R: (B, T/r, 2d)
                # alignments: (B, N, T/r)
                # max_attentions: (B,)
                self.R, self.alignments, self.max_attentions = Attention(
                    self.Q,
                    self.K,
                    self.V,
                    mononotic_attention=(not training),
                    prev_max_attentions=self.prev_max_attentions)
            with tf.variable_scope("AudioDec"):
                self.Y_logits, self.Y = AudioDec(
                    self.R, training=training)  # (B, T/r, n_mels)

            # During inference, the predicted melspectrogram values are fed.
        with tf.variable_scope("SSRN"):
            self.Z_logits, self.Z = SSRN(self.Y, training=training)

        with tf.variable_scope("gs"):
            self.global_step = tf.Variable(0,
                                           name='global_step',
                                           trainable=False)
Пример #16
0
    def __init__(self, hp):
        self.hp = hp
        self.token2idx, self.idx2token = load_vocab(hp.vocab)
        self.embeddings = get_token_embeddings(self.hp.vocab_size,
                                               self.hp.d_model,
                                               zero_pad=True)

        self.input_x = tf.placeholder(dtype=tf.int32,
                                      shape=(None, None),
                                      name="input_x")
        self.decoder_input = tf.placeholder(dtype=tf.int32,
                                            shape=(None, None),
                                            name="decoder_input")
        self.target = tf.placeholder(dtype=tf.int32,
                                     shape=(None, None),
                                     name="target")
        self.is_training = tf.placeholder(dtype=tf.bool, name="is_training")

        # encoder
        self.encoder_hidden = self.encode(self.input_x,
                                          training=self.is_training)

        # decoder
        self.logits = self.decode(self.decoder_input,
                                  self.encoder_hidden,
                                  training=self.is_training)

        self.y_hat = tf.to_int32(tf.argmax(self.logits, axis=-1),
                                 name="y_predict_v2")

        # loss
        self.smoothing_y = label_smoothing(
            tf.one_hot(self.target, depth=self.hp.vocab_size))
        self.ce_loss = tf.nn.softmax_cross_entropy_with_logits_v2(
            logits=self.logits, labels=self.smoothing_y)
        nonpadding = tf.to_float(
            tf.not_equal(self.target, self.token2idx["<pad>"]))
        self.loss = tf.reduce_sum(
            self.ce_loss * nonpadding) / (tf.reduce_sum(nonpadding) + 1e-7)

        # optimize
        self.global_step = tf.train.get_or_create_global_step()
        self.lr = noam_scheme(self.hp.lr, self.global_step,
                              self.hp.warmup_steps)
        optimizer = tf.train.AdamOptimizer(self.lr)
        self.train_op = optimizer.minimize(self.loss,
                                           global_step=self.global_step)

        # tensorboard
        tf.summary.scalar('lr', self.lr)
        tf.summary.scalar("loss", self.loss)
        tf.summary.scalar("global_step", self.global_step)
        self.summaries = tf.summary.merge_all()

        # predict part
        self.y_predict = tf.identity(self.greedy_search(), name="y_predict")
Пример #17
0
    def __init__(self, hp, inj_type=None, quant_min_max=None, inj_layer=None):
        self.hp = hp
        self.inj_type = inj_type
        self.quant_min_max = quant_min_max
        self.inj_layer = inj_layer

        self.token2idx, self.idx2token = load_vocab(hp.vocab)
        self.embeddings = get_token_embeddings(self.hp.vocab_size,
                                               self.hp.d_model,
                                               zero_pad=True)
Пример #18
0
def create_qa_context(model_path: str, word_to_ix_path: str, embed_dim: int,
                      hidden_dim: int, device) -> QAContext:
    word_dict = load_vocab(word_to_ix_path)
    vocab_size = len(word_dict)
    model = CNNBiLSTMAtt(vocab_size, embed_dim, hidden_dim)
    if not torch.cuda.is_available():
        model.load_state_dict(torch.load(model_path, map_location='cpu'))
    else:
        model.load_state_dict(torch.load(model_path))
    return QAContext(model, word_dict, device)
Пример #19
0
 def __init__(self, log_dir="log_dir", sample_dir="samples"):
     self._sess_loaded = False
     self.log_dir = log_dir
     self.sample_dir = sample_dir
     char2idx, idx2char = load_vocab()
     self.char2idx = char2idx
     self.idx2char = idx2char
     self.g = Graph(mode="synthesize")
     print("Graph loaded")
     self.load_session()
Пример #20
0
def eval(mode):
    '''
    Get a Spearman rank-order correlation coefficient.

    Args:
      mode: A string. Either `val` or `test`.
    '''
    # Set save directory
    savedir = hp.valdir if mode == "val" else hp.testdir

    # Load graph
    g = Graph(is_training=False)
    print("Graph loaded")

    # Load data
    X, Y = load_data(mode=mode)
    nucl2idx, idx2nucl = load_vocab()

    with g.graph.as_default():
        sv = tf.train.Supervisor()
        with sv.managed_session(config=tf.ConfigProto(
                allow_soft_placement=True)) as sess:
            # Restore parameters
            sv.saver.restore(sess, tf.train.latest_checkpoint(hp.logdir))
            print("Restored!")

            # Get model
            mname = open(hp.logdir + '/checkpoint',
                         'r').read().split('"')[1]  # model name

            # Inference
            if not os.path.exists(savedir): os.mkdir(savedir)
            with open("{}/{}".format(savedir, mname), 'w') as fout:
                fout.write("{}\t{}\t{}]\n".format("probe",
                                                  "expected intensity",
                                                  "predicted intensity"))
                expected, got = [], []
                for step in range(len(X) // hp.batch_size):
                    x = X[step * hp.batch_size:(step + 1) * hp.batch_size]
                    y = Y[step * hp.batch_size:(step + 1) * hp.batch_size]

                    # predict nucl
                    logits = sess.run(g.logits, {g.x: x})

                    for xx, yy, ll in zip(x, y, logits):  # sequence-wise
                        fout.write("{}\t{}\t{}\n".format(
                            "".join(idx2nucl[idx] for idx in xx), yy, ll))
                        expected.append(yy)
                        got.append(ll)

                # Spearman rank coefficient
                score, _ = spearmanr(expected, got)
                fout.write("Spearman rank correlation coefficients: " +
                           str(score))
def eval():
    if not os.path.exists('./results'):
        os.makedirs('./results')

    # Load graph
    print("Graph loaded")
    print("Model name:{}".format(hp.modelname))
    # Load data
    print("Testing Data...")
    txt_src_names, idx_src_names, txt_tgt_names, _ = load_evaluate_data(
        eval_mode="test")

    x_w2i, x_i2w, y_w2i, y_i2w = load_vocab()

    g = Graph(is_training=False)
    with g.graph.as_default(), tf.Session() as sess:
        sv = tf.train.Saver()
        # Restore parameters
        print("Parameter Restoring...")
        sv.restore(sess,
                   tf.train.latest_checkpoint(hp.logdir + '/' + hp.modelname))
        # Inference
        count = 0
        with open('./results/' + hp.modelname + '_result.txt', "w") as fout:
            for i in range(0, len(txt_src_names), hp.batch_size):
                batch_txt_src_names = txt_src_names[i:i + hp.batch_size]
                batch_idx_src_names = idx_src_names[i:i + hp.batch_size]
                batch_txt_tgt_names = txt_tgt_names[i:i + hp.batch_size]
                batch_predicted_ids = sess.run(g.pred_outputs, {
                    g.x: batch_idx_src_names
                }).predicted_ids[:, :, :]

                for source, target, predicted_ids in zip(
                        batch_txt_src_names, batch_txt_tgt_names,
                        batch_predicted_ids):
                    print(
                        str(count) + '\t' + source + '\t' +
                        hangul.join_jamos(target))
                    count += 1
                    candidates = []
                    predicted_ids = predicted_ids.transpose(1, 0)
                    for pred in predicted_ids:
                        candidate = "".join(y_i2w[idx]
                                            for idx in pred).split("E")[0]
                        candidate = hangul.join_jamos(candidate)
                        candidates.append(candidate)

                    fout.write(source + '\t')
                    fout.write(hangul.join_jamos(target))
                    for candidate in candidates:
                        fout.write('\t')
                        fout.write(candidate.encode('utf-8'))
                    fout.write('\n')
                    fout.flush()
Пример #22
0
def main():
    g = Graph(is_training=False)

    # Load data
    nums, X, ys = load_test_data()
    pnyn2idx, idx2pnyn, hanzi2idx, idx2hanzi = load_vocab()

    with g.graph.as_default():
        sv = tf.train.Supervisor()
        with sv.managed_session(config=tf.ConfigProto(
                allow_soft_placement=True)) as sess:
            # Restore parameters
            sv.saver.restore(sess, tf.train.latest_checkpoint(hp.logdir))
            print("Restored!")

            # Get model
            mname = open(hp.logdir + '/checkpoint',
                         'r').read().split('"')[1]  # model name

            with codecs.open(
                    'eval/{}_{}.csv'.format(
                        mname, "qwerty" if hp.isqwerty else "nine"), 'w',
                    'utf-8') as fout:
                fout.write(
                    "NUM,EXPECTED,{}_{},# characters,edit distance\n".format(
                        mname, "qwerty" if hp.isqwerty else "nine"))

                total_edit_distance, num_chars = 0, 0
                for step in range(len(X) // hp.batch_size):
                    num = nums[step * hp.batch_size:(step + 1) *
                               hp.batch_size]  #number batch
                    x = X[step * hp.batch_size:(step + 1) *
                          hp.batch_size]  # input batch
                    y = ys[step * hp.batch_size:(step + 1) *
                           hp.batch_size]  # batch of ground truth strings

                    preds = sess.run(g.preds, {g.x: x})
                    for n, xx, pred, expected in zip(num, x, preds,
                                                     y):  # sentence-wise
                        got = "".join(
                            idx2hanzi[idx]
                            for idx in pred)[:np.count_nonzero(xx)].replace(
                                "_", "")

                        edit_distance = distance.levenshtein(expected, got)
                        total_edit_distance += edit_distance
                        num_chars += len(expected)

                        fout.write(u"{},{},{},{},{}\n".format(
                            n, expected, got, len(expected), edit_distance))
                fout.write(u"Total CER: {}/{}={},,,,\n".format(
                    total_edit_distance, num_chars,
                    round(float(total_edit_distance) / num_chars, 2)))
Пример #23
0
    def Graph(self):
        graph = tf.Graph()
        with graph.as_default():
            if self.is_training:
                next_element, iterator, num_batch = get_batch_data(self.is_training)
                self.X, self.Y, self.seq_len = next_element["X"], next_element["Y"], next_element["seq_len"]

            else:
                self.X = tf.placeholder(tf.int32, shape=(None, config.maxlen))
                self.Y = tf.placeholder(tf.int32, shape=(None, config.maxlen))
                self.seq_len = tf.placeholder(tf.int32, shape=(None))
            idx2word, word2idx, idx2labl, labl2idx  = load_vocab()
            embed = embedding(self.X,len(word2idx),config.embed_dim,
                                       config.use_pretrain)

            if config.embeddig_mode=="concat":
                assert config.embed_dim==config.position_dim
            #TODO this part still dont know how to complete better!

            elif config.embeddig_mode=="add":
                embed+=position_encoding(self.X,config.position_dim,
                                                 config.sinusoid)
            # input embedding Dropout
            embed = tf.layers.dropout(embed,rate=config.dropout_rate,training=self.is_training)
            #Muilty layer Bilstm 
            outputs = multibilstm(embed,self.seq_len,config.num_units,config.num_layer,self.is_training,config.cell)
            
            #full connect layer
            # here we use two layer full connect layer. residual and activation can be set by your self. 
            outputs = feedforward(outputs,outputs.get_shape().as_list()[2],scope="first")#residual default used
            outputs = feedforward(outputs,config.num_class,residual=False,scope="second")
            noutput = tf.reshape(outputs, [-1, config.maxlen, config.num_class])

            # crf layer
            if config.use_crf:
                loss, acc, predicts,true_labels = crf_layer(self.Y,noutput,config.num_class,self.seq_len,self.is_training)
            else:
                loss, acc, predicts, true_labels = loss_layer(self.Y, noutput, config.num_class)
            tf.summary.scalar('acc',acc)
            global_step = tf.Variable(0, name='global_step')
            if self.is_training:
                # use exponential_decay to help the model fit quicker
                if config.exponential_decay:
                    learning_rate = tf.train.exponential_decay(
                        config.lr,global_step, 200, 0.96, staircase=True
                    )
                # optimizer = tf.train.AdamOptimizer(learning_rate=config.lr, beta1=0.9, beta2=0.99, epsilon=1e-8)
                optimizer = tf.train.RMSPropOptimizer(learning_rate=config.lr)
                train_op = optimizer.minimize(loss, global_step=global_step)
                tf.summary.scalar('mean_loss',loss)
            else:
                train_op=None
            return graph,train_op,loss, acc, predicts,true_labels,global_step
Пример #24
0
    def encode(self, x, training=True):
        '''
        Returns
        memory: encoder outputs. (N, T1, d_model)
        '''
        scopes = []
        outputs = []
        with tf.variable_scope("embeddings", reuse=tf.AUTO_REUSE):
            self.token2idx, self.idx2token = load_vocab(self.hp.vocab)
            self.embeddings = get_token_embeddings(self.hp.vocab_size,
                                                   self.hp.d_model,
                                                   zero_pad=True)
            scopes.append(tf.get_variable_scope().name)
            outputs.append(self.embeddings)
        with tf.variable_scope("encoder_embedding_lookup",
                               reuse=tf.AUTO_REUSE):
            # src_masks
            src_masks = tf.math.equal(x, 0)  # (N, T1)

            # embedding
            enc = tf.nn.embedding_lookup(self.embeddings,
                                         x)  # (N, T1, d_model)
            enc *= self.hp.d_model**0.5  # scale

            enc += positional_encoding(enc, self.hp.maxlen1)
            enc = tf.layers.dropout(enc,
                                    self.hp.dropout_rate,
                                    training=training)
            scopes.append(tf.get_variable_scope().name)
            outputs.append(enc)
            ## Blocks
        for i in range(self.hp.num_blocks):
            with tf.variable_scope("encoder_num_blocks_{}".format(i),
                                   reuse=tf.AUTO_REUSE):
                # self-attention
                enc = multihead_attention(queries=enc,
                                          keys=enc,
                                          values=enc,
                                          key_masks=src_masks,
                                          num_heads=self.hp.num_heads,
                                          dropout_rate=self.hp.dropout_rate,
                                          training=training,
                                          causality=False)
                # feed forward
                enc = ff(enc, num_units=[self.hp.d_ff, self.hp.d_model])
                scopes.append(tf.get_variable_scope().name)
                outputs.append(enc)
        memory = enc
        return memory, src_masks, outputs, scopes
Пример #25
0
    def test_load_vocab(self):
        test_vocab = [
            '<PAD>', '<UNK>', '<S>', '</S>', 'the', 'to', 'of', 'and', 'a'
        ]

        test2idx = {word: idx for idx, word in enumerate(test_vocab)}
        idx2test = {idx: word for idx, word in enumerate(test_vocab)}

        word2idx, idx2word = load_vocab('src')

        for key, value in test2idx.items():
            self.assertEqual(value, word2idx[key])

        for key, value in idx2test.items():
            self.assertEqual(value, idx2test[key])
Пример #26
0
def main():
    seq_path = f'{config.data_dir}/train/in.txt'
    tag_path = f'{config.data_dir}/train/out.txt'
    vocab_path = f'{config.data_dir}/vocabs'

    args = get_args()
    epochs = args.epochs
    batch_size = args.batch_size
    lr = args.lr
    max_seq_len = args.max_len

    embed_dim = config.embed_dim
    hidden_dim = config.hidden_dim
    output_dir = config.ouput_dir

    device = torch.device(
        "cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu")

    logger.info(f"***** Loading vocab *****")
    word_to_ix = load_vocab(vocab_path)
    vocab_size = len(word_to_ix)

    logger.info(f"***** Initializing dataset *****")
    train_dataloader = init_dataset(seq_path, tag_path, word_to_ix,
                                    max_seq_len, batch_size)

    logger.info(f"***** Training *****")
    model = CNNBiLSTMAtt(vocab_size, embed_dim, hidden_dim)
    model.to(device)
    model.train()
    optimizer = optim.Adam(model.parameters(), lr=lr)
    loss_func = nn.CrossEntropyLoss(ignore_index=word_to_ix['[PAD]'])

    for epoch in range(epochs):
        logger.info(f"***** Epoch {epoch} *****")
        for step, batch in enumerate(train_dataloader):
            optimizer.zero_grad()
            batch = tuple(t.to(device) for t in batch)
            seq_ids, exted_att_mask, tag_ids = batch
            logits = model(seq_ids, exted_att_mask)
            loss = loss_func(logits.view(-1, vocab_size), tag_ids.view(-1))
            loss.backward()
            optimizer.step()
            if step % 100 == 0:
                logger.info(
                    f"[epoch]: {epoch}, [batch]: {step}, [loss]: {loss.item()}"
                )
        save_model(model, output_dir, epoch + 1)
Пример #27
0
def evaluate():
    # Load graph
    g = Graph(mode="evaluate_las"); print("Graph loaded")

    # Load data
    _, idx2char = load_vocab()
    fpaths, _, texts = load_data(mode="evaluate_las")
    all_mel_spec = [load_pre_spectrograms(fpath)[1] for fpath in fpaths]
    maxlen = max([len(m) for m in all_mel_spec])
    new_mel_spec = np.zeros((len(all_mel_spec), maxlen, hp.n_mels), np.float)
    for i, m_spec in enumerate(all_mel_spec):
        new_mel_spec[i, :len(m_spec), :] = m_spec

    saver_las = tf.train.Saver(var_list=g.las_variable)
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        saver_las.restore(sess, tf.train.latest_checkpoint(hp.las_logdir))
        total_y_hat = np.zeros((len(texts), 100), np.float32)
        batch_idx = list(range(0,len(texts),hp.las_inference_batch_size))
        batch_idx.append(len(texts))
        for i in tqdm.tqdm(range(len(batch_idx)-1)):
            y_hat = total_y_hat[batch_idx[i]:batch_idx[i+1]]
            mel_spec = new_mel_spec[batch_idx[i]:batch_idx[i+1]]
            for j in range(100):
                _y_hat = sess.run(g.preds, {g.mels_las: mel_spec, g.texts_las: y_hat})
                y_hat[:, j] = _y_hat[:, j]
            total_y_hat[batch_idx[i]:batch_idx[i+1]] = y_hat

    all_we = 0
    all_wrd = 0
    opf = open(os.path.join(hp.las_logdir,"Inference_text_seqs.txt"), "w") #inference output

    for i, idx_inf in enumerate(total_y_hat):
        fname = os.path.basename(fpaths[i])

        idx_gt = texts[i]
        str_gt = get_sent(idx2char, idx_gt)

        str_inf = get_sent(idx2char, idx_inf)

        all_we += wer(list(str_inf), list(str_gt))
        all_wrd += len(str_gt)
        #all_we += float(wer(list(str_inf), list(str_gt)))/float(len(str_gt))

        final_str = fname + '\n' + str_gt + '\n' + str_inf + '\n'*2
        opf.write(final_str)
    print('cer: ' + str(all_we/all_wrd))
    opf.write('cer: ' + str(all_we/all_wrd))
Пример #28
0
def infer(hp):
    load_hparams(hp, hp.ckpt)

    # latest checkpoint
    ckpt_ = tf.train.latest_checkpoint(hp.ckpt)
    ckpt = ckpt_ if ckpt_ else hp.ckpt

    # load graph
    saver = tf.train.import_meta_graph(ckpt + '.meta', clear_devices=True)
    graph = tf.get_default_graph()

    # load tensor
    input_x = graph.get_tensor_by_name("input_x:0")
    is_training = graph.get_tensor_by_name("is_training:0")
    y_predict = graph.get_tensor_by_name("y_predict:0")

    # vocabulary
    token2idx, idx2token = load_vocab(hp.vocab)

    logging.info("# Session")
    with tf.Session() as sess:
        saver.restore(sess, ckpt)

        while True:
            text = input("请输入测试样本:")

            # tokens to ids
            tokens = [ch for ch in text] + ["</s>"]
            x = [token2idx.get(t, token2idx["<unk>"]) for t in tokens]

            # run calculation
            predict_result = sess.run(y_predict,
                                      feed_dict={
                                          input_x: [x],
                                          is_training: False
                                      })

            # ids to tokens
            token_pred = [
                idx2token.get(t_id, "#") for t_id in predict_result[0]
            ]
            translation = "".join(token_pred).split("</s>")[0]

            logging.info("  译文: {}".format(translation))

            time.sleep(0.1)
Пример #29
0
    def __init__(self):
        # Load vocabulary
        self.char2idx, self.idx2char = load_vocab()

        self.L = tf.placeholder(tf.int32, shape=(None, None))
        self.mels = tf.placeholder(tf.float32, shape=(None, None, n_mels))
        self.prev_max_attentions = tf.placeholder(tf.int32, shape=(None, ))

        # network 1

        with tf.variable_scope("Text2Mel"):
            # Get S or decoder inputs. (B, T//r, n_mels)
            self.S = tf.concat(
                (tf.zeros_like(self.mels[:, :1, :]), self.mels[:, :-1, :]), 1)

            # Networks
            with tf.variable_scope("TextEnc"):
                self.K, self.V = TextEnc(self.L)  # (N, Tx, e)

            with tf.variable_scope("AudioEnc"):
                self.Q = AudioEnc(self.S)

            with tf.variable_scope("Attention"):
                # R: (B, T/r, 2d)
                # alignments: (B, N, T/r)
                # max_attentions: (B,)
                self.R, self.alignments, self.max_attentions = Attention(
                    self.Q,
                    self.K,
                    self.V,
                    mononotic_attention=True,
                    prev_max_attentions=self.prev_max_attentions)
            with tf.variable_scope("AudioDec"):
                self.Y_logits, self.Y = AudioDec(self.R)  # (B, T/r, n_mels)

        # network 2

        # During inference, the predicted melspectrogram values are fed.
        with tf.variable_scope("SSRN"):
            self.Z_logits, self.Z = SSRN(self.Y)

        with tf.variable_scope("gs"):
            self.global_step = tf.Variable(0,
                                           name='global_step',
                                           trainable=False)
Пример #30
0
    def _net1(self):
        with tf.variable_scope('net1'):
            # Load vocabulary
            phn2idx, idx2phn = load_vocab()

            # Pre-net
            prenet_out = prenet(self.x_mfcc,
                                num_units=[hp.Train1.hidden_units, hp.Train1.hidden_units // 2],
                                dropout_rate=hp.Train1.dropout_rate,
                                is_training=self.is_training)  # (N, T, E/2)

            # CBHG
            out = cbhg(prenet_out, hp.Train1.num_banks, hp.Train1.hidden_units // 2, hp.Train1.num_highway_blocks, hp.Train1.norm_type, self.is_training)

            # Final linear projection
            logits = tf.layers.dense(out, len(phn2idx))  # (N, T, V)
            ppgs = tf.nn.softmax(logits / hp.Train1.t)  # (N, T, V)
            preds = tf.to_int32(tf.arg_max(logits, dimension=-1))  # (N, T)

        return ppgs, preds, logits
Пример #31
0
 def __init__(self, hp):
     self.hp = hp
     self.token2idx, self.idx2token = load_vocab(
         os.path.join(hp.data_dir, hp.vocab))
     self.steps = []
     if self.hp.fac_embed:
         self.embeddings1, self.embeddings2 = get_factorized_token_embeddings(
             self.hp.vocab_size,
             self.hp.d_embed,
             self.hp.d_model,
             zero_pad=True,
             normalized=self.hp.norm_embedding,
             ortho=self.hp.ortho_embedding)
     else:
         self.embeddings = get_token_embeddings(
             self.hp.vocab_size,
             self.hp.d_model,
             zero_pad=True,
             normalized=self.hp.norm_embedding,
             ortho=self.hp.ortho_embedding)
def create_train_data():
    from data_load import load_vocab
    roma2idx, idx2roma, surf2idx, idx2surf = load_vocab()
    romaji_sents, surface_sents = [], []
    for line in codecs.open('preprocessed/ja.tsv', 'r', 'utf-8'):
        try:
            idx, romaji_sent, surface_sent = line.strip().split("\t")
        except ValueError:
            continue

        if len(romaji_sent) < hp.max_len:
            romaji_sents.append(
                np.array([roma2idx.get(roma, 1) for roma in romaji_sent + "S"],
                         np.int32).tostring())
            surface_sents.append(
                np.array(
                    [surf2idx.get(surf, 1) for surf in surface_sent + "S"],
                    np.int32).tostring())
    pickle.dump((romaji_sents, surface_sents),
                open('preprocessed/train.pkl', 'wb'),
                protocol=2)
Пример #33
0
def main_batches():  
    g = Graph(is_training=False)
    
    # Load data
    nums, X, ys = load_test_data()
    pnyn2idx, idx2pnyn, hanzi2idx, idx2hanzi = load_vocab()
    
    with g.graph.as_default():    
        sv = tf.train.Supervisor()
        with sv.managed_session(config=tf.ConfigProto(allow_soft_placement=True)) as sess:
            # Restore parameters
            sv.saver.restore(sess, tf.train.latest_checkpoint(hp.logdir)); print("Restored!")

            # Get model
            mname = open(hp.logdir + '/checkpoint', 'r').read().split('"')[1] # model name
            
            with codecs.open('eval/{}_{}.csv'.format(mname, "qwerty" if hp.isqwerty else "nine"), 'w', 'utf-8') as fout:
                fout.write("NUM,EXPECTED,{}_{},# characters,edit distance\n".format(mname, "qwerty" if hp.isqwerty else "nine"))
                
                total_edit_distance, num_chars = 0, 0
                for step in range(len(X)//hp.batch_size):
                    num = nums[step*hp.batch_size:(step+1)*hp.batch_size] #number batch
                    x = X[step*hp.batch_size:(step+1)*hp.batch_size] # input batch
                    y = ys[step*hp.batch_size:(step+1)*hp.batch_size] # batch of ground truth strings
                    
                    preds = sess.run(g.preds, {g.x: x})
                    for n, xx, pred, expected in zip(num, x, preds, y): # sentence-wise
                        #got = "".join(idx2hanzi[str(idx)] for idx in pred)[:np.count_nonzero(xx)].replace("_", "")
                        got = "".join(idx2hanzi[idx] for idx in pred)[:np.count_nonzero(xx)].replace("_", "")
                        edit_distance = distance.levenshtein(expected, got)
                        total_edit_distance += edit_distance
                        num_chars += len(expected)
                
                        fout.write(u"{},{},{},{},{}\n".format(n, expected, got, len(expected), edit_distance))
                fout.write(u"Total CER: {}/{}={},,,,\n".format(total_edit_distance, 
                                                        num_chars, 
                                                        round(float(total_edit_distance)/num_chars, 2)))
Пример #34
0
    def __init__(self, training=True):
        # Load vocabulary
        self.char2idx, self.idx2char = load_vocab()

        # Graph
        self.graph = tf.Graph()
        with self.graph.as_default():
            # Data Feeding
            ## x: Text. (N, Tx), int32
            ## y1: Reduced melspectrogram. (N, Ty//r, n_mels*r) float32
            ## y2: Reduced dones. (N, Ty//r,) int32
            ## z: Magnitude. (N, Ty, n_fft//2+1) float32
            if training:
                self.x, self.y1, self.y2, self.z, self.num_batch = get_batch()
                self.prev_max_attentions_li = tf.ones(shape=(hp.dec_layers, hp.batch_size), dtype=tf.int32)
            else: # Inference
                self.x = tf.placeholder(tf.int32, shape=(hp.batch_size, hp.Tx))
                self.y1 = tf.placeholder(tf.float32, shape=(hp.batch_size, hp.Ty//hp.r, hp.n_mels*hp.r))
                self.prev_max_attentions_li = tf.placeholder(tf.int32, shape=(hp.dec_layers, hp.batch_size,))

            # Get decoder inputs: feed last frames only (N, Ty//r, n_mels)
            self.decoder_input = tf.concat((tf.zeros_like(self.y1[:, :1, -hp.n_mels:]), self.y1[:, :-1, -hp.n_mels:]), 1)

            # Networks
            with tf.variable_scope("encoder"):
                self.keys, self.vals = encoder(self.x, training=training) # (N, Tx, e)

            with tf.variable_scope("decoder"):
                # mel_logits: (N, Ty/r, n_mels*r)
                # done_output: (N, Ty/r, 2),
                # decoder_output: (N, Ty/r, e)
                # alignments_li: dec_layers*(Tx, Ty/r)
                # max_attentions_li: dec_layers*(N, T_y/r)
                self.mel_logits, self.done_output, self.decoder_output, self.alignments_li, self.max_attentions_li \
                    = decoder(self.decoder_input,
                             self.keys,
                             self.vals,
                             self.prev_max_attentions_li,
                             training=training)
                self.mel_output = tf.nn.sigmoid(self.mel_logits)

            with tf.variable_scope("converter"):
                # Restore shape
                self.converter_input = tf.reshape(self.decoder_output, (-1, hp.Ty, hp.embed_size//hp.r))
                self.converter_input = fc_block(self.converter_input,
                                                hp.converter_channels,
                                                activation_fn=tf.nn.relu,
                                                training=training) # (N, Ty, v)

                # Converter
                self.mag_logits = converter(self.converter_input, training=training) # (N, Ty, 1+n_fft//2)
                self.mag_output = tf.nn.sigmoid(self.mag_logits)

            self.global_step = tf.Variable(0, name='global_step', trainable=False)
            if training:
                # Loss
                self.loss_mels = tf.reduce_mean(tf.abs(self.mel_output - self.y1))
                self.loss_dones = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=self.done_output, labels=self.y2))
                self.loss_mags = tf.reduce_mean(tf.abs(self.mag_output - self.z))
                self.loss = self.loss_mels + self.loss_dones + self.loss_mags

                # Training Scheme
                self.optimizer = tf.train.AdamOptimizer(learning_rate=hp.lr)
                ## gradient clipping
                self.gvs = self.optimizer.compute_gradients(self.loss)
                self.clipped = []
                for grad, var in self.gvs:
                    grad = tf.clip_by_value(grad, -1. * hp.max_grad_val, hp.max_grad_val)
                    grad = tf.clip_by_norm(grad, hp.max_grad_norm)
                    self.clipped.append((grad, var))
                self.train_op = self.optimizer.apply_gradients(self.clipped, global_step=self.global_step)
                   
                # Summary
                tf.summary.scalar('Train_Loss/LOSS', self.loss)
                tf.summary.scalar('Train_Loss/mels', self.loss_mels)
                tf.summary.scalar('Train_Loss/dones', self.loss_dones)
                tf.summary.scalar('Train_Loss/mags', self.loss_mags)

                self.merged = tf.summary.merge_all()
Пример #35
0
    def __init__(self, mode="train"):
        # Load vocabulary
        self.char2idx, self.idx2char = load_vocab()

        # Set phase
        is_training=True if mode=="train" else False

        # Graph
        # Data Feeding
        # x: Text. (N, Tx)
        # y: Reduced melspectrogram. (N, Ty//r, n_mels*r)
        # z: Magnitude. (N, Ty, n_fft//2+1)
        if mode=="train":
            self.x, self.y, self.z, self.fnames, self.num_batch = get_batch()
        elif mode=="eval":
            self.x = tf.placeholder(tf.int32, shape=(None, None))
            self.y = tf.placeholder(tf.float32, shape=(None, None, hp.n_mels*hp.r))
            self.z = tf.placeholder(tf.float32, shape=(None, None, 1+hp.n_fft//2))
            self.fnames = tf.placeholder(tf.string, shape=(None,))
        else: # Synthesize
            self.x = tf.placeholder(tf.int32, shape=(None, None))
            self.y = tf.placeholder(tf.float32, shape=(None, None, hp.n_mels * hp.r))

        # Get encoder/decoder inputs
        self.encoder_inputs = embed(self.x, len(hp.vocab), hp.embed_size) # (N, T_x, E)
        self.decoder_inputs = tf.concat((tf.zeros_like(self.y[:, :1, :]), self.y[:, :-1, :]), 1) # (N, Ty/r, n_mels*r)
        self.decoder_inputs = self.decoder_inputs[:, :, -hp.n_mels:] # feed last frames only (N, Ty/r, n_mels)

        # Networks
        with tf.variable_scope("net"):
            # Encoder
            self.memory = encoder(self.encoder_inputs, is_training=is_training) # (N, T_x, E)

            # Decoder1
            self.y_hat, self.alignments = decoder1(self.decoder_inputs,
                                                     self.memory,
                                                     is_training=is_training) # (N, T_y//r, n_mels*r)
            # Decoder2 or postprocessing
            self.z_hat = decoder2(self.y_hat, is_training=is_training) # (N, T_y//r, (1+n_fft//2)*r)

        # monitor
        self.audio = tf.py_func(spectrogram2wav, [self.z_hat[0]], tf.float32)

        if mode in ("train", "eval"):
            # Loss
            self.loss1 = tf.reduce_mean(tf.abs(self.y_hat - self.y))
            self.loss2 = tf.reduce_mean(tf.abs(self.z_hat - self.z))
            self.loss = self.loss1 + self.loss2

            # Training Scheme
            self.global_step = tf.Variable(0, name='global_step', trainable=False)
            self.lr = learning_rate_decay(hp.lr, global_step=self.global_step)
            self.optimizer = tf.train.AdamOptimizer(learning_rate=self.lr)

            ## gradient clipping
            self.gvs = self.optimizer.compute_gradients(self.loss)
            self.clipped = []
            for grad, var in self.gvs:
                grad = tf.clip_by_norm(grad, 5.)
                self.clipped.append((grad, var))
            self.train_op = self.optimizer.apply_gradients(self.clipped, global_step=self.global_step)

            # Summary
            tf.summary.scalar('{}/loss1'.format(mode), self.loss1)
            tf.summary.scalar('{}/loss'.format(mode), self.loss)
            tf.summary.scalar('{}/lr'.format(mode), self.lr)

            tf.summary.image("{}/mel_gt".format(mode), tf.expand_dims(self.y, -1), max_outputs=1)
            tf.summary.image("{}/mel_hat".format(mode), tf.expand_dims(self.y_hat, -1), max_outputs=1)
            tf.summary.image("{}/mag_gt".format(mode), tf.expand_dims(self.z, -1), max_outputs=1)
            tf.summary.image("{}/mag_hat".format(mode), tf.expand_dims(self.z_hat, -1), max_outputs=1)

            tf.summary.audio("{}/sample".format(mode), tf.expand_dims(self.audio, 0), hp.sr)
            self.merged = tf.summary.merge_all()
Пример #36
0
    def __init__(self, is_training=True):
        self.graph = tf.Graph()
        with self.graph.as_default():
            if is_training:
                self.x, self.y, self.num_batch = get_batch()
            else:  # Evaluation
                self.x = tf.placeholder(tf.int32, shape=(None, hp.maxlen,))
                self.y = tf.placeholder(tf.int32, shape=(None, hp.maxlen,))

            # Load vocabulary
            pnyn2idx, _, hanzi2idx, _ = load_vocab()

            # Character Embedding for x
            enc = embed(self.x, len(pnyn2idx), hp.embed_size, scope="emb_x")

            # Encoder pre-net
            prenet_out = prenet(enc,
                                num_units=[hp.embed_size, hp.embed_size // 2],
                                is_training=is_training)  # (N, T, E/2)

            # Encoder CBHG
            ## Conv1D bank
            enc = conv1d_banks(prenet_out,
                               K=hp.encoder_num_banks,
                               num_units=hp.embed_size // 2,
                               is_training=is_training)  # (N, T, K * E / 2)

            ## Max pooling
            enc = tf.layers.max_pooling1d(enc, 2, 1, padding="same")  # (N, T, K * E / 2)

            ## Conv1D projections
            enc = conv1d(enc, hp.embed_size // 2, 5, scope="conv1d_1")  # (N, T, E/2)
            enc = normalize(enc, type=hp.norm_type, is_training=is_training,
                            activation_fn=tf.nn.relu, scope="norm1")
            enc = conv1d(enc, hp.embed_size // 2, 5, scope="conv1d_2")  # (N, T, E/2)
            enc = normalize(enc, type=hp.norm_type, is_training=is_training,
                            activation_fn=None, scope="norm2")
            enc += prenet_out  # (N, T, E/2) # residual connections

            ## Highway Nets
            for i in range(hp.num_highwaynet_blocks):
                enc = highwaynet(enc, num_units=hp.embed_size // 2,
                                 scope='highwaynet_{}'.format(i))  # (N, T, E/2)

            ## Bidirectional GRU
            enc = gru(enc, hp.embed_size // 2, True, scope="gru1")  # (N, T, E)

            ## Readout
            self.outputs = tf.layers.dense(enc, len(hanzi2idx), use_bias=False)
            self.preds = tf.to_int32(tf.arg_max(self.outputs, dimension=-1))

            if is_training:
                self.loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=self.y, logits=self.outputs)
                self.istarget = tf.to_float(tf.not_equal(self.y, tf.zeros_like(self.y)))  # masking
                self.hits = tf.to_float(tf.equal(self.preds, self.y)) * self.istarget
                self.acc = tf.reduce_sum(self.hits) / tf.reduce_sum(self.istarget)
                self.mean_loss = tf.reduce_sum(self.loss * self.istarget) / tf.reduce_sum(self.istarget)

                # Training Scheme
                self.global_step = tf.Variable(0, name='global_step', trainable=False)
                self.optimizer = tf.train.AdamOptimizer(learning_rate=hp.lr)
                self.train_op = self.optimizer.minimize(self.mean_loss, global_step=self.global_step)

                # Summary
                tf.summary.scalar('mean_loss', self.mean_loss)
                tf.summary.scalar('acc', self.acc)
                self.merged = tf.summary.merge_all()