def evaluate(batch_idx, epoch): model.eval() hyp_list = [] ref_list = [] start_time = time.time() for ix, batch in enumerate(valid_iter, start=1): src_raw = batch[0] trg_raw = batch[1:] src, src_mask = convert_data(src_raw, src_vocab, device, True, UNK, PAD, SOS, EOS) with torch.no_grad(): output = model.beamsearch(src, src_mask, opt.beam_size, normalize=True) best_hyp, best_score = output[0] best_hyp = convert_str([best_hyp], trg_vocab) hyp_list.append(best_hyp[0]) ref = map(lambda x: x[0], trg_raw) ref_list.append(ref) elapsed = time.time() - start_time bleu1 = corpus_bleu(ref_list, hyp_list, smoothing_function=SmoothingFunction().method1) hyp_list = map(lambda x: ' '.join(x), hyp_list) p_tmp = tempfile.mktemp() f_tmp = open(p_tmp, 'w') f_tmp.write('\n'.join(hyp_list)) f_tmp.close() bleu2 = bleu_script(p_tmp) print('BLEU score for {}-{} is {}/{}, {}'.format(epoch, batch_idx, bleu1, bleu2, elapsed)) opt.score_list.append((bleu2, batch_idx, epoch))
def decode(args): option, values = load_model(args.model) config = tf.ConfigProto() config.gpu_options.allow_growth = True if args.gpuid >= 0: config.gpu_options.visible_device_list = "%d" % args.gpuid svocabs, tvocabs = option["vocabulary"] svocab, isvocab = svocabs tvocab, itvocab = tvocabs unk_sym = option["unk"] eos_sym = option["eos"] count = 0 doption = {} doption["maxlen"] = args.maxlen doption["minlen"] = args.minlen doption["beamsize"] = args.beamsize doption["normalize"] = args.normalize with tf.Session(config=config): model = rnnsearch(**option) model.option = option tf.global_variables_initializer().run() set_variables(tf.trainable_variables(), values) while True: line = sys.stdin.readline() if line == "": break data = [line] seq, seq_len = convert_data(data, svocab, unk_sym, eos_sym) t1 = time.time() tlist = beamsearch(model, seq, **doption) t2 = time.time() if len(tlist) == 0: sys.stdout.write("\n") score = -10000.0 else: best, score = tlist[0] sys.stdout.write(" ".join(best[:-1])) sys.stdout.write("\n") count = count + 1 sys.stderr.write(str(count) + " ") sys.stderr.write(str(score) + " " + str(t2 - t1) + "\n")
def sample_fn(*args, **kwargs): data = args[0] batch = len(data[0]) ind = np.random.randint(0, batch) sdata = data[0][ind] tdata = data[1][ind] xdata, _, xlen = convert_data(data[0], svocab, unk, eos) xdata = xdata[ind:ind + 1, :] xlen = xlen[ind:ind + 1] hls = beamsearch(model, xdata, xlen, **search_opt) best, score = hls[0] print("> " + sdata) print("> " + tdata) print("> " + " ".join(best[:-1]))
def evaluate(batch_idx, epoch): model.eval() hyp_list = [] ref_list = [] start_time = time.time() for ix, batch in enumerate(valid_iter, start=1): src_raw = batch[0] trg_raw = batch[1:] src, src_mask = convert_data( src_raw, src_vocab, device, True, UNK, PAD, SOS, EOS ) with torch.no_grad(): output = model.beamsearch(src, src_mask, opt.beam_size, normalize=True) best_hyp, best_score = output[0] best_hyp = convert_str([best_hyp], trg_vocab) hyp_list.append(best_hyp[0]) ref = [x[0] for x in trg_raw] ref_list.append(ref) elapsed = time.time() - start_time hyp_list = [" ".join(x) for x in hyp_list] p_tmp = tempfile.mktemp() f_tmp = open(p_tmp, "w") f_tmp.write("\n".join(hyp_list)) f_tmp.close() bleu2 = bleu_script(p_tmp) bleu_1_gram = bleu(hyp_list, ref_list, smoothing=True, n=1) bleu_2_gram = bleu(hyp_list, ref_list, smoothing=True, n=2) bleu_3_gram = bleu(hyp_list, ref_list, smoothing=True, n=3) bleu_4_gram = bleu(hyp_list, ref_list, smoothing=True, n=4) writer.add_scalar("./bleu_1_gram", bleu_1_gram, epoch) writer.add_scalar("./bleu_2_gram", bleu_2_gram, epoch) writer.add_scalar("./bleu_3_gram", bleu_3_gram, epoch) writer.add_scalar("./bleu_4_gram", bleu_4_gram, epoch) writer.add_scalar("./multi-bleu", bleu2, epoch) bleu_result = [ ["multi-bleu", "bleu_1-gram", "bleu_2-gram", "bleu_3-gram", "bleu_4-gram"], [bleu2, bleu_1_gram, bleu_2_gram, bleu_3_gram, bleu_4_gram], ] bleu_table = AsciiTable(bleu_result) logger.info( "BLEU score for Epoch-{}-batch-{}: ".format(epoch, batch_idx) + "\n" + bleu_table.table ) opt.score_list.append( (bleu2, bleu_1_gram, bleu_2_gram, bleu_3_gram, bleu_4_gram, batch_idx, epoch) )
def translate(model, corpus, **opt): fd = open(corpus, "r") svocab = model.option["vocabulary"][0][0] unk_symbol = model.option["unk"] eos_symbol = model.option["eos"] trans = [] for line in fd: line = line.strip() data, length = convert_data([line], svocab, unk_symbol, eos_symbol) hls = beamsearch(model, data, **opt) if len(hls) > 0: best, score = hls[0] trans.append(best[:-1]) else: trans.append([]) fd.close() return trans
sys.stderr.write(err) sys.exit(1) bleu = float(out) return bleu hyp_list = [] ref_list = [] start_time = time.time() for ix, batch in enumerate(test_iter, start=1): src_raw = batch[0] trg_raw = batch[1:] src, src_mask = convert_data(src_raw, src_vocab, device, True, UNK, PAD, SOS, EOS) with torch.no_grad(): output = model.beamsearch(src, src_mask, opt.beam_size, normalize=True) best_hyp, best_score = output[0] best_hyp = convert_str([best_hyp], trg_vocab) hyp_list.append(best_hyp[0]) ref = map(lambda x: x[0], trg_raw) ref_list.append(ref) print(ix, len(test_iter), 100. * ix / len(test_iter)) elapsed = time.time() - start_time bleu1 = corpus_bleu(ref_list, hyp_list, smoothing_function=SmoothingFunction().method1) hyp_list = map(lambda x: ' '.join(x), hyp_list) p_tmp = tempfile.mktemp() f_tmp = open(p_tmp, 'w') f_tmp.write('\n'.join(hyp_list)) f_tmp.close()
def train(args): option = default_option() # predefined model names pathname, basename = os.path.split(args.model) modelname = get_filename(basename) autoname = os.path.join(pathname, modelname + ".autosave.pkl") bestname = os.path.join(pathname, modelname + ".best.pkl") # load models if os.path.exists(args.model): option, params = load_model(args.model) init = False else: init = True if args.initialize: init_params = load_model(args.initialize) init_params = init_params[1] restore = True else: restore = False override(option, args) print_option(option) # load references if option["references"]: references = load_references(option["references"]) else: references = None # input corpus batch = option["batch"] sortk = option["sort"] or 1 shuffle = option["seed"] if option["shuffle"] else None reader = textreader(option["corpus"], shuffle) processor = [data_length, data_length] stream = textiterator(reader, [batch, batch * sortk], processor, option["limit"], option["sort"]) if shuffle and option["indices"] is not None: reader.set_indices(option["indices"]) if args.reset: option["count"] = [0, 0] option["epoch"] = 0 option["cost"] = 0.0 skip_stream(reader, option["count"][1]) epoch = option["epoch"] maxepoch = option["maxepoch"] # create session config = tf.ConfigProto() config.gpu_options.allow_growth = True if args.gpuid >= 0: config.gpu_options.visible_device_list = "%d" % args.gpuid with tf.Session(config=config): # set seed np.random.seed(option["seed"]) tf.set_random_seed(option["seed"]) # create model initializer = tf.random_uniform_initializer(-0.08, 0.08) model = rnnsearch(initializer=initializer, **option) print "parameters:", count_parameters(tf.trainable_variables()) variables = None if restore: matched, not_matched = match_variables(tf.trainable_variables(), init_params) if args.finetune: variables = not_matched # create optimizer constraint = ["norm", option["norm"]] optim = optimizer(model, algorithm=option["optimizer"], norm=True, constraint=constraint, variables=variables) tf.global_variables_initializer().run() if not init: set_variables(tf.trainable_variables(), params) if restore: restore_variables(matched, not_matched) # beamsearch option search_opt = {} search_opt["beamsize"] = option["beamsize"] search_opt["normalize"] = option["normalize"] search_opt["maxlen"] = option["maxlen"] search_opt["minlen"] = option["minlen"] # vocabulary and special symbol svocabs, tvocabs = option["vocabulary"] svocab, isvocab = svocabs tvocab, itvocab = tvocabs unk_sym = option["unk"] eos_sym = option["eos"] # summary count = option["count"][0] totcost = option["cost"] best_score = option["bleu"] alpha = option["alpha"] for i in range(epoch, maxepoch): for data in stream: xdata, xlen = convert_data(data[0], svocab, unk_sym, eos_sym) ydata, ylen = convert_data(data[1], tvocab, unk_sym, eos_sym) t1 = time.time() cost, norm = optim.optimize(xdata, xlen, ydata, ylen) optim.update(alpha=alpha) t2 = time.time() count += 1 cost = cost * len(ylen) / sum(ylen) totcost += cost / math.log(2) print i + 1, count, cost, norm, t2 - t1 # save model if count % option["freq"] == 0: option["indices"] = reader.get_indices() option["bleu"] = best_score option["cost"] = totcost option["count"] = [count, reader.count] serialize(autoname, option) if count % option["vfreq"] == 0: if option["validation"] and references: trans = translate(model, option["validation"], **search_opt) bleu_score = bleu(trans, references) print "bleu: %2.4f" % bleu_score if bleu_score > best_score: best_score = bleu_score option["indices"] = reader.get_indices() option["bleu"] = best_score option["cost"] = totcost option["count"] = [count, reader.count] serialize(bestname, option) if count % option["sfreq"] == 0: batch = len(data[0]) ind = np.random.randint(0, batch) sdata = data[0][ind] tdata = data[1][ind] xdata = xdata[:, ind : ind + 1] xlen = xlen[ind : ind + 1] hls = beamsearch(model, xdata, xlen, **search_opt) best, score = hls[0] print sdata print tdata print "search score:", score print "translation:", " ".join(best[:-1]) print "--------------------------------------------------" if option["vfreq"] and references: trans = translate(model, option["validation"], **search_opt) bleu_score = bleu(trans, references) print "iter: %d, bleu: %2.4f" % (i + 1, bleu_score) if bleu_score > best_score: best_score = bleu_score option["indices"] = reader.get_indices() option["bleu"] = best_score option["cost"] = totcost option["count"] = [count, reader.count] serialize(bestname, option) print "averaged cost: ", totcost / option["count"][0] print "--------------------------------------------------" # early stopping if i >= option["stop"]: alpha = alpha * option["decay"] count = 0 totcost = 0.0 stream.reset() # update autosave option["epoch"] = i + 1 option["alpha"] = alpha option["indices"] = reader.get_indices() option["bleu"] = best_score option["cost"] = totcost option["count"] = [0, 0] serialize(autoname, option) print "best(bleu): %2.4f" % best_score stream.close()
def train(epoch): model.train() opt.epoch_best_score = -float("inf") opt.epoch_best_name = None for batch_idx, batch in enumerate(train_iter, start=1): batch = sort_batch(batch) src_raw = batch[0] trg_raw = batch[1] src, src_mask = convert_data( src_raw, src_vocab, device, True, UNK, PAD, SOS, EOS ) f_trg, f_trg_mask = convert_data( trg_raw, trg_vocab, device, False, UNK, PAD, SOS, EOS ) b_trg, b_trg_mask = convert_data( trg_raw, trg_vocab, device, True, UNK, PAD, SOS, EOS ) optimizer.zero_grad() if opt.cuda and torch.cuda.device_count() > 1 and opt.local_rank is None: loss, w_loss = nn.parallel.data_parallel( model, (src, src_mask, f_trg, f_trg_mask, b_trg, b_trg_mask), device_ids ) else: loss, w_loss = model(src, src_mask, f_trg, f_trg_mask, b_trg, b_trg_mask) global_batches = len(train_iter) * epoch + current_batches writer.add_scalar( "./loss", scalar_value=loss.item(), global_step=global_batches, ) loss.mean().backward() torch.nn.utils.clip_grad_norm_(param_list, opt.grad_clip) optimizer.step() if batch_idx % 10 == 0 or batch_idx == len(train_iter) or batch_idx == 0: logger.info( str( "Epoch: {} batch: {}/{}({:.3%}), loss: {:.6}, lr: {}".format( epoch, batch_idx, len(train_iter), batch_idx / len(train_iter), loss.item(), opt.cur_lr, ) ) ) # validation if batch_idx % opt.vfreq == 0: logger.info(str("===========validation / test START===========")) evaluate(batch_idx, epoch) model.train() if opt.decay_lr: adjust_learningrate(opt.score_list) if len(opt.score_list) == 1 or opt.score_list[-1][0] > max( [x[0] for x in opt.score_list[:-1]] ): if opt.best_name is not None: os.remove(os.path.join(opt.checkpoint, opt.best_name)) opt.best_name = save_model(model, batch_idx, epoch, "best") if opt.epoch_best and opt.score_list[-1][0] > opt.epoch_best_score: opt.epoch_best_score = opt.score_list[-1][0] if opt.epoch_best_name is not None: os.remove(os.path.join(opt.checkpoint, opt.epoch_best_name)) opt.epoch_best_name = save_model(model, batch_idx, epoch, "epoch-best") logger.info("===========validation / test DONE===========") # sampling if batch_idx % opt.sfreq == 0: length = len(src_raw) ix = np.random.randint(0, length) samp_src_raw = [src_raw[ix]] samp_trg_raw = [trg_raw[ix]] samp_src, samp_src_mask = convert_data( samp_src_raw, src_vocab, device, True, UNK, PAD, SOS, EOS ) model.eval() with torch.no_grad(): output = model.beamsearch(samp_src, samp_src_mask, opt.beam_size) best_hyp, best_score = output[0] best_hyp = convert_str([best_hyp], trg_vocab) sampling_result = [] sampling_result.append(["Key", "Value"]) sampling_result.append(["Source", str(" ".join(samp_src_raw[0]))]) sampling_result.append(["Target", str(" ".join(samp_trg_raw[0]))]) sampling_result.append(["Predict", str(" ".join(best_hyp[0]))]) sampling_result.append(["Best Score", str(round(best_score, 5))]) sampling_table = AsciiTable(sampling_result) logger.info("===========sampling START===========") logger.info("\n" + str(sampling_table.table)) logger.info("===========sampling DONE===========") model.train() # saving model if opt.freq and batch_idx % opt.freq == 0: if opt.tmp_name is not None: os.remove(os.path.join(opt.checkpoint, opt.tmp_name)) opt.tmp_name = save_model(model, batch_idx, epoch, "tmp")
def decode(args): option, values = load_model(args.model) #option, values = load_average_model(args.model) config = tf.ConfigProto() config.gpu_options.allow_growth = True svocabs, tvocabs = option["vocabulary"] svocab, isvocab = svocabs tvocab, itvocab = tvocabs unk_sym = option["unk"] eos_sym = option["eos"] source_word2vec, target_word2vec = option["word2vecs"] count = 0 doption = { "maxlen": args.maxlen, "minlen": args.minlen, "beamsize": args.beamsize, "normalize": args.normalize } # create graph model = NMT(option["num_layers"], option["num_heads"], option["attention_dropout"], option["residual_dropout"], option["relu_dropout"], option["embedding"], option["hidden"], option["filter"], len(isvocab), len(itvocab), source_word2vec, target_word2vec) model.option = option input_file = open(args.corpus, 'r') output_file = open(args.translation, 'w') with tf.Session(config=config): tf.global_variables_initializer().run() set_variables(tf.trainable_variables(), values) line = input_file.readline() while line: line_list = line.split() data = [line] seq, _, seq_len = convert_data(data, svocab, unk_sym, eos_sym) t1 = time.time() tlist = beamsearch(model, seq, seq_len, **doption) t2 = time.time() if len(tlist) == 0: sys.stdout.write("\n") score = -10000.0 else: best, score = tlist[0] output_file.write(" ".join(best[:-1])) output_file.write("\n") count = count + 1 sys.stderr.write(str(count) + " ") sys.stderr.write(str(score) + " " + str(t2 - t1) + "\n") line = input_file.readline() output_file.close() input_file.close()
def train(epoch): model.train() opt.epoch_best_score = -float('inf') opt.epoch_best_name = None for batch_idx, batch in enumerate(train_iter, start=1): start_time = time.time() batch = sort_batch(batch) src_raw = batch[0] trg_raw = batch[1] src, src_mask = convert_data(src_raw, src_vocab, device, True, UNK, PAD, SOS, EOS) f_trg, f_trg_mask = convert_data(trg_raw, trg_vocab, device, False, UNK, PAD, SOS, EOS) b_trg, b_trg_mask = convert_data(trg_raw, trg_vocab, device, True, UNK, PAD, SOS, EOS) optimizer.zero_grad() if opt.cuda and torch.cuda.device_count( ) > 1 and opt.local_rank is None: R = nn.parallel.data_parallel( model, (src, src_mask, f_trg, f_trg_mask, b_trg, b_trg_mask), device_ids) else: R = model(src, src_mask, f_trg, f_trg_mask, b_trg, b_trg_mask) R[0].mean().backward() grad_norm = torch.nn.utils.clip_grad_norm_(param_list, opt.grad_clip) optimizer.step() elapsed = time.time() - start_time R = map(lambda x: str(x.mean().item()), R) print(epoch, batch_idx, len(train_iter), 100. * batch_idx / len(train_iter), ' '.join(R), grad_norm.item(), opt.cur_lr, elapsed) # validation if batch_idx % opt.vfreq == 0: evaluate(batch_idx, epoch) model.train() if opt.decay_lr: adjust_learningrate(opt.score_list) if len(opt.score_list) == 1 or \ opt.score_list[-1][0] > max(map(lambda x: x[0], opt.score_list[:-1])): if opt.best_name is not None: os.remove(os.path.join(opt.checkpoint, opt.best_name)) opt.best_name = save_model(model, batch_idx, epoch, 'best') if opt.epoch_best and opt.score_list[-1][0] > opt.epoch_best_score: opt.epoch_best_score = opt.score_list[-1][0] if opt.epoch_best_name is not None: os.remove(os.path.join(opt.checkpoint, opt.epoch_best_name)) opt.epoch_best_name = save_model(model, batch_idx, epoch, 'epoch-best') # sampling if batch_idx % opt.sfreq == 0: length = len(src_raw) ix = np.random.randint(0, length) samp_src_raw = [src_raw[ix]] samp_trg_raw = [trg_raw[ix]] samp_src, samp_src_mask = convert_data(samp_src_raw, src_vocab, device, True, UNK, PAD, SOS, EOS) model.eval() with torch.no_grad(): output = model.beamsearch(samp_src, samp_src_mask, opt.beam_size) best_hyp, best_score = output[0] best_hyp = convert_str([best_hyp], trg_vocab) print('--', ' '.join(samp_src_raw[0])) print('--', ' '.join(samp_trg_raw[0])) print('--', ' '.join(best_hyp[0])) print('--', best_score) model.train() # saving model if opt.freq and batch_idx % opt.freq == 0: if opt.tmp_name is not None: os.remove(os.path.join(opt.checkpoint, opt.tmp_name)) opt.tmp_name = save_model(model, batch_idx, epoch, 'tmp')