示例#1
0
                           pad_token=0,
                           use_vocab=False,
                           sequential=True)
        GRAPH = GraphField(batch_first=True)

        DOC.vocab = torch.load(args.vocab)
        print('vocab {} loaded'.format(args.vocab))
        args.__dict__.update({'doc_vocab': len(DOC.vocab)})

        args_str = json.dumps(args.__dict__, indent=4, sort_keys=True)
        print(args_str)

        test_data = DocDataset(path=args.test,
                               text_field=DOC,
                               order_field=ORDER,
                               graph_field=GRAPH)
        test_real = DocIter(test_data,
                            1,
                            device="cuda" if args.gpu else "cpu",
                            batch_size_fn=None,
                            train=False,
                            repeat=False,
                            shuffle=False,
                            sort=False)

        print('{} Load data done'.format(curtime()))
        start = time.time()
        decode(args, test_real, (DOC, ORDER), checkpoint)
        print('{} Decode done, time {} mins'.format(
            curtime(), (time.time() - start) / 60))
示例#2
0
def run_model(args):
    if args.mode == 'train' or args.mode=='example':
        if args.load_from is not None and len(args.load_from) == 1:
            load_from = args.load_from[0]
            print('{} load the checkpoint from {} for initilize or resume'.
                  format(curtime(), load_from))
            checkpoint = torch.load(load_from, map_location='cpu')
        else:
            checkpoint = None

        # if not resume(initilize), only need model parameters
        if args.resume:
            print('update args from checkpoint')
            load_dict = checkpoint['args'].__dict__
            except_name = ['mode', 'resume', 'maximum_steps']
            override(args, load_dict, tuple(except_name))

        main_path = Path(args.main_path)
        model_path = main_path / args.model_path
        decoding_path = main_path / args.decoding_path

        for path in [model_path, decoding_path]:
            path.mkdir(parents=True, exist_ok=True)

        args.model_path = str(model_path)
        args.decoding_path = str(decoding_path)

        if args.model == '[time]':
            args.model = time.strftime("%m.%d_%H.%M.", time.gmtime())

        # setup random seeds
        set_seeds(args.seed)

        # special process, shuffle each document
        # DOC = DocField(batch_first=True, include_lengths=True, eos_token='<eos>', init_token='<bos>')
        DOC = DocField(batch_first=True, include_lengths=True)
        ORDER = data.Field(batch_first=True, include_lengths=True, pad_token=0, use_vocab=False,
                           sequential=True)

        GRAPH = GraphField(batch_first=True)
        LABEL_FIELD=data.Field(batch_first=True, include_lengths=True, use_vocab=False)
        train_data = DocDataset(path=args.corpus, text_field=DOC, order_field=ORDER, graph_field=GRAPH)

        dev_data = DocDataset(path=args.valid, text_field=DOC, order_field=ORDER, graph_field=GRAPH)

        DOC.vocab = torch.load(args.vocab)
        print('vocab {} loaded'.format(args.vocab))
        args.__dict__.update({'doc_vocab': len(DOC.vocab)})

        train_flag = True
        train_real = DocIter(train_data, args.batch_size, device='cuda',
                             train=train_flag,
                             shuffle=train_flag,
                             sort_key=lambda x: len(x.doc))

        devbatch = 1
        dev_real = DocIter(dev_data, devbatch, device='cuda', batch_size_fn=None,
                           train=False, repeat=False, shuffle=False, sort=False)

        args_str = json.dumps(args.__dict__, indent=4, sort_keys=True)
        print(args_str)

        print('{} Start training'.format(curtime()))
        train(args, train_real, dev_real, (DOC, ORDER, GRAPH), checkpoint)
    else:
        if len(args.load_from) == 1:
            load_from = '{}.coqa_best.pt'.format(args.load_from[0])
            print('{} load the best checkpoint from {}'.format(curtime(), load_from))
            checkpoint = torch.load(load_from, map_location='cpu')
        else:
            raise RuntimeError('must load model')

        # when translate load_dict update args except some
        print('update args from checkpoint')
        load_dict = checkpoint['args'].__dict__
        except_name = ['mode', 'load_from', 'test', 'writetrans', 'beam_size', 'batch_size']
        override(args, load_dict, tuple(except_name))

        print('{} Load test set'.format(curtime()))

        DOC = DocField(batch_first=True, include_lengths=True)
        ORDER = data.Field(batch_first=True, include_lengths=True, pad_token=0, use_vocab=False,
                           sequential=True)
        GRAPH = GraphField(batch_first=True)

        DOC.vocab = torch.load(args.vocab)
        print('vocab {} loaded'.format(args.vocab))
        args.__dict__.update({'doc_vocab': len(DOC.vocab)})

        args_str = json.dumps(args.__dict__, indent=4, sort_keys=True)
        print(args_str)

        test_data = DocDataset(path=args.test, text_field=DOC, order_field=ORDER, graph_field=GRAPH)
        test_real = DocIter(test_data, 1, device='cuda', batch_size_fn=None,
                            train=False, repeat=False, shuffle=False, sort=False)

        print('{} Load data done'.format(curtime()))
        start = time.time()
        decode(args, test_real, (DOC, ORDER), checkpoint)
        print('{} Decode done, time {} mins'.format(curtime(), (time.time() - start) / 60))