def main(): parser = argparse.ArgumentParser( description='Chainer example: convolutional seq2seq') parser.add_argument('--batchsize', '-b', type=int, default=48, help='Number of images in each mini-batch') parser.add_argument('--epoch', '-e', type=int, default=100, help='Number of sweeps over the dataset to train') parser.add_argument('--gpu', '-g', type=int, default=-1, help='GPU ID (negative value indicates CPU)') parser.add_argument('--unit', '-u', type=int, default=512, help='Number of units') parser.add_argument('--layer', '-l', type=int, default=6, help='Number of layers') parser.add_argument('--head', type=int, default=8, help='Number of heads in attention mechanism') parser.add_argument('--dropout', '-d', type=float, default=0.1, help='Dropout rate') parser.add_argument('--input', '-i', type=str, default='./', help='Input directory') parser.add_argument('--source', '-s', type=str, default='europarl-v7.fr-en.en', help='Filename of train data for source language') parser.add_argument('--target', '-t', type=str, default='europarl-v7.fr-en.fr', help='Filename of train data for target language') parser.add_argument('--source-valid', '-svalid', type=str, default='dev/newstest2013.en', help='Filename of validation data for source language') parser.add_argument('--target-valid', '-tvalid', type=str, default='dev/newstest2013.fr', help='Filename of validation data for target language') parser.add_argument('--out', '-o', default='result', help='Directory to output the result') parser.add_argument('--source-vocab', type=int, default=40000, help='Vocabulary size of source language') parser.add_argument('--target-vocab', type=int, default=40000, help='Vocabulary size of target language') parser.add_argument('--no-bleu', '-no-bleu', action='store_true', help='Skip BLEU calculation') parser.add_argument('--use-label-smoothing', action='store_true', help='Use label smoothing for cross entropy') parser.add_argument('--embed-position', action='store_true', help='Use position embedding rather than sinusoid') parser.add_argument('--use-fixed-lr', action='store_true', help='Use fixed learning rate rather than the ' + 'annealing proposed in the paper') args = parser.parse_args() print(json.dumps(args.__dict__, indent=4)) # Check file en_path = os.path.join(args.input, args.source) source_vocab = ['<eos>', '<unk>', '<bos>'] + \ preprocess.count_words(en_path, args.source_vocab) source_data = preprocess.make_dataset(en_path, source_vocab) fr_path = os.path.join(args.input, args.target) target_vocab = ['<eos>', '<unk>', '<bos>'] + \ preprocess.count_words(fr_path, args.target_vocab) target_data = preprocess.make_dataset(fr_path, target_vocab) assert len(source_data) == len(target_data) print('Original training data size: %d' % len(source_data)) train_data = [(s, t) for s, t in six.moves.zip(source_data, target_data) if 0 < len(s) < 50 and 0 < len(t) < 50] print('Filtered training data size: %d' % len(train_data)) en_path = os.path.join(args.input, args.source_valid) source_data = preprocess.make_dataset(en_path, source_vocab) fr_path = os.path.join(args.input, args.target_valid) target_data = preprocess.make_dataset(fr_path, target_vocab) assert len(source_data) == len(target_data) test_data = [(s, t) for s, t in six.moves.zip(source_data, target_data) if 0 < len(s) and 0 < len(t)] source_ids = {word: index for index, word in enumerate(source_vocab)} target_ids = {word: index for index, word in enumerate(target_vocab)} target_words = {i: w for w, i in target_ids.items()} source_words = {i: w for w, i in source_ids.items()} # Define Model model = net.Transformer(args.layer, min(len(source_ids), len(source_words)), min(len(target_ids), len(target_words)), args.unit, h=args.head, dropout=args.dropout, max_length=500, use_label_smoothing=args.use_label_smoothing, embed_position=args.embed_position) if args.gpu >= 0: chainer.cuda.get_device(args.gpu).use() model.to_gpu(args.gpu) # Setup Optimizer optimizer = chainer.optimizers.Adam(alpha=5e-5, beta1=0.9, beta2=0.98, eps=1e-9) optimizer.setup(model) # Setup Trainer train_iter = chainer.iterators.SerialIterator(train_data, args.batchsize) test_iter = chainer.iterators.SerialIterator(test_data, args.batchsize, repeat=False, shuffle=False) iter_per_epoch = len(train_data) // args.batchsize print('Number of iter/epoch =', iter_per_epoch) updater = training.StandardUpdater(train_iter, optimizer, converter=seq2seq_pad_concat_convert, device=args.gpu) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) # If you want to change a logging interval, change this number log_trigger = (min(200, iter_per_epoch // 2), 'iteration') def floor_step(trigger): floored = trigger[0] - trigger[0] % log_trigger[0] if floored <= 0: floored = trigger[0] return (floored, trigger[1]) # Validation every half epoch eval_trigger = floor_step((iter_per_epoch // 2, 'iteration')) record_trigger = training.triggers.MinValueTrigger('val/main/perp', eval_trigger) evaluator = extensions.Evaluator(test_iter, model, converter=seq2seq_pad_concat_convert, device=args.gpu) evaluator.default_name = 'val' trainer.extend(evaluator, trigger=eval_trigger) # Use Vaswan's magical rule of learning rate(Eq. 3 in the paper) # But, the hyperparamter in the paper seems to work well # only with a large batchsize. # If you run on popular setup (e.g. size=48 on 1 GPU), # you may have to change the hyperparamter. # I scaled learning rate by 0.5 consistently. # ("scale" is always multiplied to learning rate.) # If you use a shallow layer network (<=2), # you may not have to change it from the paper setting. if not args.use_fixed_lr: trainer.extend( # VaswaniRule('alpha', d=args.unit, warmup_steps=4000, scale=1.), # VaswaniRule('alpha', d=args.unit, warmup_steps=32000, scale=1.), # VaswaniRule('alpha', d=args.unit, warmup_steps=4000, scale=0.5), # VaswaniRule('alpha', d=args.unit, warmup_steps=16000, scale=1.), VaswaniRule('alpha', d=args.unit, warmup_steps=64000, scale=1.), trigger=(1, 'iteration')) observe_alpha = extensions.observe_value( 'alpha', lambda trainer: trainer.updater.get_optimizer('main').alpha) trainer.extend(observe_alpha, trigger=(1, 'iteration')) # Only if a model gets best validation score, # save (overwrite) the model trainer.extend(extensions.snapshot_object(model, 'best_model.npz'), trigger=record_trigger) def translate_one(source, target): words = preprocess.split_sentence(source) print('# source : ' + ' '.join(words)) x = model.xp.array([source_ids.get(w, 1) for w in words], 'i') ys = model.translate([x], beam=5)[0] words = [target_words[y] for y in ys] print('# result : ' + ' '.join(words)) print('# expect : ' + target) @chainer.training.make_extension(trigger=(200, 'iteration')) def translate(trainer): translate_one('Who are we ?', 'Qui sommes-nous?') translate_one( 'And it often costs over a hundred dollars ' + 'to obtain the required identity card .', 'Or, il en coûte souvent plus de cent dollars ' + 'pour obtenir la carte d\'identité requise.') source, target = test_data[numpy.random.choice(len(test_data))] source = ' '.join([source_words[i] for i in source]) target = ' '.join([target_words[i] for i in target]) translate_one(source, target) # Gereneration Test trainer.extend(translate, trigger=(min(200, iter_per_epoch), 'iteration')) # Calculate BLEU every half epoch if not args.no_bleu: trainer.extend(CalculateBleu(model, test_data, 'val/main/bleu', device=args.gpu, batch=args.batchsize // 4), trigger=floor_step((iter_per_epoch // 2, 'iteration'))) # Log trainer.extend(extensions.LogReport(trigger=log_trigger), trigger=log_trigger) trainer.extend(extensions.PrintReport([ 'epoch', 'iteration', 'main/loss', 'val/main/loss', 'main/perp', 'val/main/perp', 'main/acc', 'val/main/acc', 'val/main/bleu', 'alpha', 'elapsed_time' ]), trigger=log_trigger) print('start training') trainer.run()
# フィルタごとにループ for i, filter_height in enumerate(self.filter_height_list): xcs[i] = F.relu(self[i](exs)) chs[i] = F.max_pooling_2d( xcs[i], (self.max_sentence_size + 1 - filter_height)) # Convolution+Poolingの結果の結合 h = F.concat(chs, axis=2) h = F.dropout(F.tanh(self[self.convolution_num + 0](h))) y = self[self.convolution_num + 1](h) return y # 学習 en_path = os.path.join("./", "train/body.txt") source_vocab = ['<eos>', '<unk>', '<bos>'] + preprocess.count_words( en_path, 900) source_data = preprocess.make_dataset(en_path, source_vocab) source_ids = {word: index for index, word in enumerate(source_vocab)} words = {i: w for w, i in source_ids.items()} N = len(source_data) words[len(words)] = "padding" a = [0] * (1000) b = [1] * (1000) data_t = a + b max_len = 0 for k in range(len(source_data)): if max_len < len(source_data[k]): max_len = len(source_data[k])
def main(): parser = argparse.ArgumentParser( description='Chainer example: Att_summary') parser.add_argument('--source', '-s', type=str, default='test/body.txt', help='source sentence list') parser.add_argument('--target', '-t', type=str, default='test/sum.txt', help='target sentence list') parser.add_argument('--source_valid', type=str, default='test/body.txt', help='source sentence list for validation') parser.add_argument('--target_valid', type=str, default='test/sum.txt', help='target sentence list for validation') parser.add_argument('--batchsize', '-b', type=int, default=100, help='number of sentence pairs in each mini-batch') parser.add_argument('--epoch', '-e', type=int, default=200, help='number of sweeps over the dataset to train') parser.add_argument('--gpu', '-g', type=int, default=-1, help='GPU ID (negative value indicates CPU)') parser.add_argument('--resume', '-r', default='', help='resume the training from snapshot') parser.add_argument('--unit', '-u', type=int, default=200, help='number of units') parser.add_argument('--layer', '-l', type=int, default=1, help='number of layers') parser.add_argument('--log_interval', type=int, default=20, help='number of iteration to show log') parser.add_argument('--validation_interval', type=int, default=20, help='number of iteration to evlauate the model ' 'with validation dataset') parser.add_argument('--out', '-o', default='result', help='directory to output the result') args = parser.parse_args() # Load pre-processed dataset print('[{}] Loading dataset... (this may take several minutes)'.format( datetime.datetime.now())) en_path = os.path.join("./", args.source) #引数は語彙数 source_vocab = ['<eos>', '<unk>', '<bos>'] + preprocess.count_words( en_path, 18000) source_data = preprocess.make_dataset(en_path, source_vocab) fr_path = os.path.join("./", args.target) target_vocab = ['<eos>', '<unk>', '<bos>'] + preprocess.count_words( fr_path, 18000) target_data = preprocess.make_dataset(fr_path, target_vocab) assert len(source_data) == len(target_data) print('Original training data size: %d' % len(source_data)) train_data = [(s, t) for s, t in six.moves.zip(source_data, target_data)] print('Filtered training data size: %d' % len(train_data)) source_ids = {word: index for index, word in enumerate(source_vocab)} target_ids = {word: index for index, word in enumerate(target_vocab)} #{}の中身を入れ換え target_words = {i: w for w, i in target_ids.items()} source_words = {i: w for w, i in source_ids.items()} # Setup model stack = 1 width = 3 print("aaa") model = Att(args.layer, len(source_ids), len(target_ids), args.unit, args.batchsize, WEIGHT, stack, width) print("aaa") if args.gpu >= 0: print("aaa") chainer.backends.cuda.get_device(args.gpu).use() model.to_gpu(args.gpu) # Setup optimizer print("aaa") optimizer = chainer.optimizers.Adam() optimizer.setup(model) # Setup iterator train_iter = chainer.iterators.SerialIterator(train_data, args.batchsize) # Setup updater and trainer updater = training.StandardUpdater(train_iter, optimizer, converter=seq2seq_pad_concat_convert, device=args.gpu) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) trainer.extend(extensions.LogReport(trigger=(1, 'epoch'))) trainer.extend(extensions.PrintReport([ 'epoch', 'iteration', 'main/loss', 'validation/main/loss', 'elapsed_time' ]), trigger=(1, 'epoch')) if args.source_valid and args.target_valid: en_path = os.path.join("./", args.source_valid) source_data = preprocess.make_dataset(en_path, source_vocab) fr_path = os.path.join("./", args.target_valid) target_data = preprocess.make_dataset(fr_path, target_vocab) assert len(source_data) == len(target_data) test_data = [(s, t) for s, t in six.moves.zip(source_data, target_data)] test_source_unknown = calculate_unknown_ratio( [s for s, _ in test_data]) test_target_unknown = calculate_unknown_ratio( [t for _, t in test_data]) print('Validation data: %d' % len(test_data)) print('Validation source unknown ratio: %.2f%%' % (test_source_unknown * 100)) print('Validation target unknown ratio: %.2f%%' % (test_target_unknown * 100)) @chainer.training.make_extension() def translate_one(trainer): #訓練文での生成の場合 defaltは0番目 a, b = map(list, zip(*train_data)) source = a[21] target = b[21] result = model.translate(source)[0] """ #テストランダムの場合 source, target = test_data[numpy.random.choice(len(test_data))] result = model.translate([model.xp.array(source)])[0] """ source_sentence = ' '.join([source_words[x] for x in source]) target_sentence = ' '.join([target_words[y] for y in target]) result_sentence = ' '.join([target_words[y] for y in result]) print('# source : ' + source_sentence) print('# result : ' + result_sentence) print('# expect : ' + target_sentence) if WEIGHT: with open("weight/wei.txt", "a", encoding="utf-8") as f: f.write("<body> <fos> " + str(source_sentence) + "\n") f.write("<generation> <fos> " + str(result_sentence) + "\n") def translate_all(trainer): a, b = map(list, zip(*test_data)) for k in range(len(test_data)): source = a[k] result = model.translate(source)[0] result_sentence = ' '.join([target_words[y] for y in result]) with open("summary/result.txt", "a", encoding="utf-8") as f: f.write(str(result_sentence) + "\n") sys.exit(0) """ if TRANS_ALL: trainer.extend(translate_all, trigger=(19, 'epoch')) """ trainer.extend(translate_one, trigger=(args.validation_interval, 'epoch')) if TRANS_ALL: trainer.extend(translate_all, trigger=(20, 'epoch')) test_iter = chainer.iterators.SerialIterator(test_data, args.batchsize, False, False) trainer.extend( extensions.Evaluator(test_iter, model, converter=seq2seq_pad_concat_convert, device=args.gpu)) trainer.extend(extensions.snapshot(), trigger=(10, 'epoch')) #trainer.extend(extensions.PlotReport( # ['main/loss','validation/main/loss'] ,x_key='epoch', file_name='loss.png')) print('start training') """ #save_model_load filename = "./result/snapshot_iter_779" serializers.load_npz(filename, trainer) """ trainer.run()
def main(): args = get_args() app = Flask(__name__) source_vocab = ['<eos>', '<unk>', '<bos>'] + \ preprocess.count_words(args.source, args.source_vocab) source_data = preprocess.make_dataset(args.source, source_vocab) target_vocab = ['<eos>', '<unk>', '<bos>'] + \ preprocess.count_words(args.target, args.target_vocab) target_data = preprocess.make_dataset(args.target, target_vocab) source_ids = {word: index for index, word in enumerate(source_vocab)} target_ids = {word: index for index, word in enumerate(target_vocab)} target_words = {i: w for w, i in target_ids.items()} source_words = {i: w for w, i in source_ids.items()} chainer.using_config('train', False) chainer.no_backprop_mode() model = net.Transformer( args.layer, min(len(source_ids), len(source_words)), min(len(target_ids), len(target_words)), args.unit, h=args.head, dropout=args.dropout, max_length=500, use_label_smoothing=args.use_label_smoothing, embed_position=args.embed_position) chainer.serializers.load_npz(args.model, model) m = MeCab('-Owakati') def translate_one(source): words = preprocess.split_sentence(source) #print('# source : ' + ' '.join(words)) x = model.xp.array( [source_ids.get(w, 1) for w in words], 'i') ys = model.translate([x], beam=5)[0] words = [target_words[y] for y in ys] return ''.join(words) @app.route('/', methods=['GET', 'POST']) def post(): title = '日経記事要約 by transformer' if request.method == 'GET': message = '日経に関連する記事を入力してください' return render_template('index.html', message=message, title=title) elif request.method == 'POST': body = request.form['body'] body = m.parse(normalize(body)) abst = translate_one(body) return render_template('index.html', body=body, title=title, abst=abst) else: return redirect(url_for('index')) app.debug = True app.run(host='localhost')
def main(): parser = argparse.ArgumentParser( description='Chainer example: convolutional seq2seq') parser.add_argument('--batchsize', '-b', type=int, default=48, help='Number of images in each mini-batch') parser.add_argument('--epoch', '-e', type=int, default=100, help='Number of sweeps over the dataset to train') parser.add_argument('--gpu', '-g', type=int, default=-1, help='GPU ID (negative value indicates CPU)') parser.add_argument('--unit', '-u', type=int, default=512, help='Number of units') parser.add_argument('--layer', '-l', type=int, default=6, help='Number of layers') parser.add_argument('--head', type=int, default=8, help='Number of heads in attention mechanism') parser.add_argument('--dropout', '-d', type=float, default=0.1, help='Dropout rate') parser.add_argument('--model', type=str, help='trained model') parser.add_argument('--input', '-i', type=str, default='./', help='Input directory') parser.add_argument('--source', '-s', type=str, default='europarl-v7.fr-en.en', help='Filename of train data for source language') parser.add_argument('--target', '-t', type=str, default='europarl-v7.fr-en.fr', help='Filename of train data for target language') parser.add_argument('--out', '-o', default='result', help='Directory to output the result') parser.add_argument('--source-vocab', type=int, default=40000, help='Vocabulary size of source language') parser.add_argument('--target-vocab', type=int, default=40000, help='Vocabulary size of target language') parser.add_argument('--no-bleu', '-no-bleu', action='store_true', help='Skip BLEU calculation') parser.add_argument('--use-label-smoothing', action='store_true', help='Use label smoothing for cross entropy') parser.add_argument('--embed-position', action='store_true', help='Use position embedding rather than sinusoid') parser.add_argument('--use-fixed-lr', action='store_true', help='Use fixed learning rate rather than the ' + 'annealing proposed in the paper') parser.add_argument('--disable-mecab', '--dm', action='store_true', help='disalbe mecab toknize') args = parser.parse_args() print(json.dumps(args.__dict__, indent=4)) # Check file en_path = os.path.join(args.input, args.source) source_vocab = ['<eos>', '<unk>', '<bos>'] + \ preprocess.count_words(en_path, args.source_vocab) source_data = preprocess.make_dataset(en_path, source_vocab) fr_path = os.path.join(args.input, args.target) target_vocab = ['<eos>', '<unk>', '<bos>'] + \ preprocess.count_words(fr_path, args.target_vocab) # print('Original training data size: %d' % len(source_data)) # print('Filtered training data size: %d' % len(train_data)) source_ids = {word: index for index, word in enumerate(source_vocab)} target_ids = {word: index for index, word in enumerate(target_vocab)} target_words = {i: w for w, i in target_ids.items()} source_words = {i: w for w, i in source_ids.items()} m = MeCab('-Owakati') # Define Model model = net.Transformer(args.layer, min(len(source_ids), len(source_words)), min(len(target_ids), len(target_words)), args.unit, h=args.head, dropout=args.dropout, max_length=500, use_label_smoothing=args.use_label_smoothing, embed_position=args.embed_position) if args.gpu >= 0: chainer.cuda.get_device(args.gpu).use() model.to_gpu(args.gpu) chainer.serializers.load_npz(args.model, model) def translate_one(source, target): words = preprocess.split_sentence(source) print('# source : ' + ' '.join(words)) x = model.xp.array([source_ids.get(w, 1) for w in words], 'i') ys = model.translate([x], beam=5)[0] words = [target_words[y] for y in ys] print('# result : ' + ' '.join(words)) print('# expect : ' + target) def tokenize(source, target): if args.disable_mecab: return source, target return m.parse(source), m.parse(target) while True: source = input('source> ') target = input('target> ') source, target = tokenize(source, target) translate_one(source, target)