def train(train_data_path, test_data_path, args): device = chainer.get_device(args.device) device.use() vocab = collections.defaultdict(lambda: len(vocab)) vocab['<unk>'] = 0 train_data = babi.read_data(vocab, train_data_path) test_data = babi.read_data(vocab, test_data_path) print('Training data: %s: %d' % (train_data_path, len(train_data))) print('Test data: %s: %d' % (test_data_path, len(test_data))) train_data = memnn.convert_data(train_data, args.max_memory) test_data = memnn.convert_data(test_data, args.max_memory) encoder = memnn.make_encoder(args.sentence_repr) network = memnn.MemNN(args.unit, len(vocab), encoder, args.max_memory, args.hop) model = chainer.links.Classifier(network, label_key='answer') opt = chainer.optimizers.Adam() model.to_device(device) opt.setup(model) train_iter = chainer.iterators.SerialIterator(train_data, args.batchsize) test_iter = chainer.iterators.SerialIterator(test_data, args.batchsize, repeat=False, shuffle=False) updater = chainer.training.StandardUpdater(train_iter, opt, device=device) trainer = chainer.training.Trainer(updater, (args.epoch, 'epoch')) @chainer.training.make_extension() def fix_ignore_label(trainer): network.fix_ignore_label() trainer.extend(fix_ignore_label) trainer.extend(extensions.Evaluator(test_iter, model, device=device)) trainer.extend(extensions.LogReport()) trainer.extend( extensions.PrintReport([ 'epoch', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy' ])) trainer.extend(extensions.ProgressBar(update_interval=10)) trainer.run() if args.model: memnn.save_model(args.model, model, vocab)
def train(train_data_path, test_data_path, args): device = chainer.get_device(args.device) device.use() vocab = collections.defaultdict(lambda: len(vocab)) vocab['<unk>'] = 0 train_data = babi.read_data(vocab, train_data_path) test_data = babi.read_data(vocab, test_data_path) print('Training data: %s: %d' % (train_data_path, len(train_data))) print('Test data: %s: %d' % (test_data_path, len(test_data))) train_data = memnn.convert_data(train_data, args.max_memory) test_data = memnn.convert_data(test_data, args.max_memory) encoder = memnn.make_encoder(args.sentence_repr) network = memnn.MemNN( args.unit, len(vocab), encoder, args.max_memory, args.hop) model = chainer.links.Classifier(network, label_key='answer') opt = chainer.optimizers.Adam() model.to_device(device) opt.setup(model) train_iter = chainer.iterators.SerialIterator( train_data, args.batchsize) test_iter = chainer.iterators.SerialIterator( test_data, args.batchsize, repeat=False, shuffle=False) updater = chainer.training.StandardUpdater(train_iter, opt, device=device) trainer = chainer.training.Trainer(updater, (args.epoch, 'epoch')) @chainer.training.make_extension() def fix_ignore_label(trainer): network.fix_ignore_label() trainer.extend(fix_ignore_label) trainer.extend(extensions.Evaluator(test_iter, model, device=device)) trainer.extend(extensions.LogReport()) trainer.extend(extensions.PrintReport( ['epoch', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy'])) trainer.extend(extensions.ProgressBar(update_interval=10)) trainer.run() if args.model: memnn.save_model(args.model, model, vocab)
def main(): parser = argparse.ArgumentParser( description='Chainer example: End-to-end memory networks') parser.add_argument('MODEL', help='Path to model directory specified with `-m` ' 'argument in the training script') parser.add_argument('DATA', help='Path to test data in bAbI dataset ' '(e.g. "qa1_single-supporting-fact_test.txt")') parser.add_argument('--gpu', '-g', type=int, default=-1, help='GPU ID (negative value indicates CPU)') args = parser.parse_args() model, vocab = memnn.load_model(args.MODEL) network = model.predictor max_memory = network.max_memory id_to_vocab = {i: v for v, i in vocab.items()} test_data = babi.read_data(vocab, args.DATA) print('Test data: %s: %d' % (args.DATA, len(test_data))) sentence_len = max( max(len(s.sentence) for s in story) for story in test_data) correct = total = 0 for story in test_data: mem = numpy.zeros((max_memory, sentence_len), dtype=numpy.int32) i = 0 for sent in story: if isinstance(sent, babi.Sentence): if i == max_memory: mem[0:i - 1, :] = mem[1:i, :] i -= 1 mem[i, 0:len(sent.sentence)] = sent.sentence i += 1 elif isinstance(sent, babi.Query): query = numpy.array(sent.sentence, dtype='i') # networks assumes mini-batch data score = network(mem[None], query[None])[0] answer = numpy.argmax(score.data) if answer == sent.answer: correct += 1 total += 1 print(id_to_vocab[answer], id_to_vocab[sent.answer]) accuracy = float(correct) / total print('Accuracy: %.2f%%' % (accuracy * 100))
def main(): parser = argparse.ArgumentParser( description='Chainer example: End-to-end memory networks') parser.add_argument('MODEL', help='Path to model directory specified with `-m` ' 'argument in the training script') parser.add_argument('DATA', help='Path to test data in bAbI dataset ' '(e.g. "qa1_single-supporting-fact_test.txt")') parser.add_argument('--gpu', '-g', type=int, default=-1, help='GPU ID (negative value indicates CPU)') args = parser.parse_args() model, vocab = memnn.load_model(args.MODEL) network = model.predictor max_memory = network.max_memory id_to_vocab = {i: v for v, i in vocab.items()} test_data = babi.read_data(vocab, args.DATA) print('Test data: %s: %d' % (args.DATA, len(test_data))) sentence_len = max(max(len(s.sentence) for s in story) for story in test_data) correct = total = 0 for story in test_data: mem = numpy.zeros((max_memory, sentence_len), dtype=numpy.int32) i = 0 for sent in story: if isinstance(sent, babi.Sentence): if i == max_memory: mem[0:i - 1, :] = mem[1:i, :] i -= 1 mem[i, 0:len(sent.sentence)] = sent.sentence i += 1 elif isinstance(sent, babi.Query): query = numpy.array(sent.sentence, dtype='i') # networks assumes mini-batch data score = network(mem[None], query[None])[0] answer = numpy.argmax(score.array) if answer == sent.answer: correct += 1 total += 1 print(id_to_vocab[answer], id_to_vocab[sent.answer]) accuracy = float(correct) / total print('Accuracy: %.2f%%' % (accuracy * 100))
def main(): parser = argparse.ArgumentParser( description='Chainer example: End-to-end memory networks') parser.add_argument('MODEL', help='Path to model directory specified with `-m` ' 'argument in the training script') parser.add_argument('DATA', help='Path to test data in bAbI dataset ' '(e.g. "qa1_single-supporting-fact_test.txt")') parser.add_argument('--device', '-d', type=str, default='-1', help='Device specifier. Either ChainerX device ' 'specifier or an integer. If non-negative integer, ' 'CuPy arrays with specified device id are used. If ' 'negative integer, NumPy arrays are used') group = parser.add_argument_group('deprecated arguments') group.add_argument('--gpu', '-g', dest='device', type=int, nargs='?', const=0, help='GPU ID (negative value indicates CPU)') args = parser.parse_args() device = chainer.get_device(args.device) xp = device.xp device.use() model, vocab = memnn.load_model(args.MODEL) model.to_device(device) network = model.predictor max_memory = network.max_memory id_to_vocab = {i: v for v, i in vocab.items()} test_data = babi.read_data(vocab, args.DATA) print('Test data: %s: %d' % (args.DATA, len(test_data))) sentence_len = max(max(len(s.sentence) for s in story) for story in test_data) correct = total = 0 for story in test_data: mem = xp.zeros((max_memory, sentence_len), dtype=numpy.int32) i = 0 for sent in story: if isinstance(sent, babi.Sentence): if i == max_memory: mem[0:i - 1, :] = mem[1:i, :] i -= 1 mem[i, 0:len(sent.sentence)] = xp.asarray(sent.sentence) i += 1 elif isinstance(sent, babi.Query): query = xp.array(sent.sentence, dtype=numpy.int32) # networks assumes mini-batch data score = network(mem[None], query[None])[0] answer = int(xp.argmax(score.array)) if answer == sent.answer: correct += 1 total += 1 print(id_to_vocab[answer], id_to_vocab[sent.answer]) accuracy = float(correct) / total print('Accuracy: %.2f%%' % (accuracy * 100))
def main(): parser = argparse.ArgumentParser( description='Chainer example: End-to-end memory networks') parser.add_argument('data', help='Path to bAbI dataset') parser.add_argument('--batchsize', '-b', type=int, default=100, help='Number of images in each mini batch') parser.add_argument('--epoch', '-e', type=int, default=100, help='Number of sweeps over the dataset to train') parser.add_argument('--gpu', '-g', type=int, default=-1, help='GPU ID (negative value indicates CPU)') parser.add_argument('--unit', '-u', type=int, default=20, help='Number of units') parser.add_argument('--hop', '-H', type=int, default=3, help='Number of hops') parser.add_argument('--max-memory', type=int, default=50, help='Maximum number of memory') parser.add_argument('--sentence-repr', choices=['bow', 'pe'], default='bow', help='Sentence representation. ' 'Select from BoW ("bow") or position encoding ("pe")') args = parser.parse_args() vocab = collections.defaultdict(lambda: len(vocab)) vocab['<unk>'] = 0 for data_id in six.moves.range(1, 21): train_data = babi.read_data( vocab, glob.glob('%s/qa%d_*train.txt' % (args.data, data_id))[0]) test_data = babi.read_data( vocab, glob.glob('%s/qa%d_*test.txt' % (args.data, data_id))[0]) print('Training data: %d' % len(train_data)) train_data = convert_data(train_data, args.max_memory) test_data = convert_data(test_data, args.max_memory) if args.sentence_repr == 'bow': encoder = BoWEncoder() elif args.sentence_repr == 'pe': encoder = PositionEncoder() else: print('Unknonw --sentence-repr option: "%s"' % args.sentence_repr) sys.exit(1) memnn = MemNN(args.unit, len(vocab), encoder, args.max_memory, args.hop) model = L.Classifier(memnn, label_key='answer') opt = optimizers.Adam() if args.gpu >= 0: chainer.cuda.get_device(args.gpu).use() model.to_gpu() opt.setup(model) train_iter = chainer.iterators.SerialIterator( train_data, args.batchsize) test_iter = chainer.iterators.SerialIterator( test_data, args.batchsize, repeat=False, shuffle=False) updater = training.StandardUpdater(train_iter, opt, device=args.gpu) trainer = training.Trainer(updater, (args.epoch, 'epoch')) @training.make_extension() def fix_ignore_label(trainer): memnn.fix_ignore_label() trainer.extend(fix_ignore_label) trainer.extend(extensions.Evaluator(test_iter, model, device=args.gpu)) trainer.extend(extensions.LogReport()) trainer.extend(extensions.PrintReport( ['epoch', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy'])) trainer.extend(extensions.ProgressBar(update_interval=10)) trainer.run()