from model_cgc import model ckpt = "cgc_36" mode = 3 from infer import infer, trim_str from itertools import islice from util_cw import CharWright from util_io import load_txt, save_txt from util_np import np, partition from util_tf import tf sess = tf.InteractiveSession() # load model cws = CharWright.load("../data/cws.pkl") cwt = CharWright.load("../data/cwt.pkl") m = model('infer', cws.dwh(), cwt.dwh()) saver = tf.train.Saver() saver.restore(sess, "../ckpt/{}".format(ckpt)) # the first 4096 instances are used for validation src = np.array(list(islice(load_txt("../data/src.txt"), 4096))) tgt = np.array(list(islice(load_txt("../data/tgt.txt"), 4096))) val = np.array(sorted(range(len(src)), key=lambda i: len(src[i]))) src = src[val] tgt = tgt[val] def translate(src, mode): for i, j in partition(len(src), 256): src_idx, len_src = cws(src[i:j], ret_img=False, ret_idx=True) pred, pidx = infer(mode, m, sess, cwt, src_idx, len_src)
############# # load data # ############# src_tgt = [] for src, tgt in zip(load_txt(path_src), load_txt(path_tgt)): src = src.strip() tgt = tgt.strip() if 3 <= len(src) <= max_char and 3 <= len(tgt) <= max_char: src_tgt.append((src, tgt)) np.random.seed(0) np.random.shuffle(src_tgt) src, tgt = zip(*src_tgt) del src_tgt ############# # save data # ############# cws = CharWright.new(chars(src)) cwt = CharWright.new(chars(tgt)) cws.save("../data/cws.pkl") cwt.save("../data/cwt.pkl") save_txt("../data/src.txt", src) save_txt("../data/tgt.txt", tgt)