def main(config): loaders = DataLoader(train_fn=config.train_fn, batch_size=config.batch_size, min_freq=config.min_vocab_freq, max_vocab=config.max_vocab_size, device=config.gpu_id) print( '|train| =', len(loaders.train_loader.dataset), '|valid| =', len(loaders.valid_loader.dataset), ) vocab_size = len(loaders.text.vocab) n_classes = len(loaders.label.vocab) print('|vocab| =', vocab_size, ' |classes| =', n_classes) if config.rnn is False and config.cnn is False: raise Exception( 'You need to specify an archtiecture to train. (--rnn or --cnn)') if config.rnn: #Declare model and loss. model = RNNClassifier( input_size=vocab_size, word_vec_size=config.word_vec_size, hidden_size=config.hidden_size, n_classes=n_classes, n_layers=config.n_layers, dropout_p=config.dropout, ) optimizer = optim.Adam(model.parameters()) crit = nn.NLLLoss() print(model) if config.gpu_id >= 0: model.cuda(config.gpu_id) crit.cuda(config.gpu_id) rnn_trainer = Trainer(config) rnn_model = rnn_trainer.train(model, crit, optimizer, loaders.train_loader, loaders.valid_loader) torch.save( { 'rnn': rnn_model.state_dict() if config.rnn else None, 'cnn': cnn_model.state_dict() if config.cnn else None, 'config': config, 'vocab': loaders.text.vocab, 'classes': loaders.label.vocab, }, config.model_fn)
def main(config, model_weight=None, opt_weight=None): def print_config(config): pp = pprint.PrettyPrinter(indent=4) pp.pprint(vars(config)) print_config(config) loader = DataLoader( config.train, config.valid, (config.lang[:2], config.lang[-2:]), batch_size=config.batch_size, device=-1, max_length=config.max_length ) input_size, output_size = len(loader.src.vocab), len(loader.tgt.vocab) model = get_model(input_size, output_size, config) crit = get_crit(output_size, data_loader.PAD) if model_weight: model.load_state_dict(model_weight) if config.gpu_id >= 0: model.cuda(config.gpu_id) crit.cuda(config.gpu_id) optimizer = get_optimizer(model, config) if opt_weight: optimizer.load_state_dict(opt_weight) lr_scheduler = None if config.verbose >= 2: print(model) print(crit) print(optimizer) trainer = Trainer(IgniteEngine, config) trainer.train( model, crit, optimizer, train_loader=loader.train_iter, valid_loader=loader.valid_iter, src_vocab=loader.src.vocab, tgt_vocab=loader.tgt.vocab, n_epochs=config.n_epochs, lr_scheduler=lr_scheduler )
def main(_): config = tf.ConfigProto(allow_soft_placement=True) config.gpu_options.allow_growth = True is_training = FLAGS.mode == 'train' with tf.Session(config=config) as sess: copy_file(save_file_dir) dataloader = DataLoader(FLAGS.dir) model = SeqUnit(FLAGS, scope_name="seq2seq", is_training=is_training) sess.run(tf.global_variables_initializer()) if FLAGS.load != '0': tvars = tf.trainable_variables() model.load(save_dir) sess.run(tf.group(*(tf.assign(model.ema.average(var), var) for var in tvars))) if FLAGS.mode == 'train': train(sess, dataloader, model) elif FLAGS.mode == "visualize": visualize_attn(sess, dataloader, model) else: evaluate(sess, dataloader, model, save_dir, mode='test')
if __name__ == '__main__': sys.stdout = codecs.getwriter("utf-8")(sys.stdout.detach()) config = define_argparser() ''' 이전에 학습했던 모델, 파라미터, vocab 데이터까지 전부 저장해뒀다가 그대로 다시 씀 ''' saved_data = torch.load( config.model_fn, map_location='cpu', ) train_config = saved_data['config'] src_vocab, tgt_vocab = get_vocabs(train_config, config, saved_data) loader = DataLoader() loader.load_vocab(src_vocab, tgt_vocab) input_size, output_size = len(loader.src.vocab), len(loader.tgt.vocab) model = get_model(input_size, output_size, train_config) if config.gpu_id >= 0: model.cuda(config.gpu_id) with torch.no_grad(): for lines in read_text(batch_size=config.batch_size): ''' 신경망 특성상, 문장을 긴순으로 정렬해서 텐서를 만들어야함 하지만 그경우, 입력 순서가 뒤틀릴 수 있으므로 original_indice를 기억해놨다가 다시 복구시켜야함 ''' lengths = [len(line) for line in lines]