Exemplo n.º 1
0
from model_cgc import model
ckpt = "cgc_36"
mode = 3

from infer import infer, trim_str
from itertools import islice
from util_cw import CharWright
from util_io import load_txt, save_txt
from util_np import np, partition
from util_tf import tf
sess = tf.InteractiveSession()

# load model
cws = CharWright.load("../data/cws.pkl")
cwt = CharWright.load("../data/cwt.pkl")
m = model('infer', cws.dwh(), cwt.dwh())
saver = tf.train.Saver()
saver.restore(sess, "../ckpt/{}".format(ckpt))

# the first 4096 instances are used for validation
src = np.array(list(islice(load_txt("../data/src.txt"), 4096)))
tgt = np.array(list(islice(load_txt("../data/tgt.txt"), 4096)))
val = np.array(sorted(range(len(src)), key=lambda i: len(src[i])))
src = src[val]
tgt = tgt[val]


def translate(src, mode):
    for i, j in partition(len(src), 256):
        src_idx, len_src = cws(src[i:j], ret_img=False, ret_idx=True)
        pred, pidx = infer(mode, m, sess, cwt, src_idx, len_src)
Exemplo n.º 2
0
#############
# load data #
#############

src_tgt = []
for src, tgt in zip(load_txt(path_src), load_txt(path_tgt)):
    src = src.strip()
    tgt = tgt.strip()
    if 3 <= len(src) <= max_char and 3 <= len(tgt) <= max_char:
        src_tgt.append((src, tgt))

np.random.seed(0)
np.random.shuffle(src_tgt)

src, tgt = zip(*src_tgt)
del src_tgt

#############
# save data #
#############

cws = CharWright.new(chars(src))
cwt = CharWright.new(chars(tgt))

cws.save("../data/cws.pkl")
cwt.save("../data/cwt.pkl")

save_txt("../data/src.txt", src)
save_txt("../data/tgt.txt", tgt)