def __init__(self, model, train_path, val_path, test_path, ckpt, args): super().__init__() self.max_epoch = args.max_epoch self.bag_size = args.bag_size self.args = args # Load data if train_path is not None: self.train_loader = build_fed_data(train_path, ckpt + '/data/fed_train.pkl', model.rel2id, model.sentence_encoder.tokenize, args, entpair_as_bag=False) if val_path is not None: self.val_loader = build_data(val_path, ckpt + '/data/dev.pkl', model.rel2id, model.sentence_encoder.tokenize, args, entpair_as_bag=True) if test_path is not None: self.test_loader = build_data(test_path, ckpt + '/data/test.pkl', model.rel2id, model.sentence_encoder.tokenize, args, entpair_as_bag=True) # Model self.model = model if self.args.use_gpu: self.model.cuda() # Ckpt self.ckpt = ckpt
type=int, default=1) learn_arg.add_argument('--decoder_lr', type=float, default=2e-5) learn_arg.add_argument('--encoder_lr', type=float, default=1e-5) learn_arg.add_argument('--lr_decay', type=float, default=0.01) learn_arg.add_argument('--weight_decay', type=float, default=1e-5) learn_arg.add_argument('--max_grad_norm', type=float, default=0) learn_arg.add_argument('--optimizer', type=str, default='AdamW', choices=['Adam', 'AdamW']) evaluation_arg = add_argument_group('Evaluation') evaluation_arg.add_argument('--n_best_size', type=int, default=100) evaluation_arg.add_argument('--max_span_length', type=int, default=12) #NYT webNLG 10 misc_arg = add_argument_group('MISC') misc_arg.add_argument('--refresh', type=str2bool, default=False) misc_arg.add_argument('--use_gpu', type=str2bool, default=True) misc_arg.add_argument('--visible_gpu', type=int, default=1) misc_arg.add_argument('--random_seed', type=int, default=1) args, unparsed = get_args() os.environ["CUDA_VISIBLE_DEVICES"] = str(args.visible_gpu) for arg in vars(args): print(arg, ":", getattr(args, arg)) set_seed(args.random_seed) data = build_data(args) model = SetPred4RE(args, data.relational_alphabet.size()) trainer = Trainer(model, data, args) trainer.train_model()
default=4) learn_arg.add_argument('--decoder_lr', type=float, default=2e-5) learn_arg.add_argument('--encoder_lr', type=float, default=1e-5) learn_arg.add_argument('--lr_decay', type=float, default=0.01) learn_arg.add_argument('--weight_decay', type=float, default=1e-5) learn_arg.add_argument('--max_grad_norm', type=float, default=0) learn_arg.add_argument('--optimizer', type=str, default='AdamW', choices=['Adam', 'AdamW']) evaluation_arg = add_argument_group('Evaluation') evaluation_arg.add_argument('--n_best_size', type=int, default=100) evaluation_arg.add_argument('--max_span_length', type=int, default=12) #NYT webNLG 10 misc_arg = add_argument_group('MISC') misc_arg.add_argument('--refresh', type=str2bool, default=False) misc_arg.add_argument('--use_gpu', type=str2bool, default=True) misc_arg.add_argument('--visible_gpu', type=str, default='0') misc_arg.add_argument('--random_seed', type=int, default=1) args, unparsed = get_args() os.environ["CUDA_VISIBLE_DEVICES"] = args.visible_gpu for arg in vars(args): print(arg, ":", getattr(args, arg)) set_seed(args.random_seed) data = build_data(args) # 包含训练集、验证集和测试集数据 model = SetPred4RE(args, data.relational_alphabet.size()) trainer = Trainer(model, data, args) trainer.train_model() trainer.predict_model()
def __init__(self, model, train_path, val_path, test_path, ckpt, args): super().__init__() self.max_epoch = args.max_epoch self.bag_size = args.bag_size self.args = args # Load data if train_path is not None: self.train_loader = build_data(train_path, ckpt + '/data/train.pkl', model.rel2id, model.sentence_encoder.tokenize, args, entpair_as_bag=False) if val_path is not None: self.val_loader = build_data(val_path, ckpt + '/data/dev.pkl', model.rel2id, model.sentence_encoder.tokenize, args, entpair_as_bag=True) if test_path is not None: self.test_loader = build_data(test_path, ckpt + '/data/test.pkl', model.rel2id, model.sentence_encoder.tokenize, args, entpair_as_bag=True) # Model self.model = model # Criterion if args.loss_weight: self.criterion = nn.CrossEntropyLoss( weight=self.train_loader.weight) else: self.criterion = nn.CrossEntropyLoss() # Params and optimizer params = self.model.parameters() if args.optimizer == 'SGD': self.optimizer = optim.SGD(params, args.lr, weight_decay=args.weight_decay) elif args.optimizer == 'Adam': self.optimizer = optim.Adam(params, args.lr, weight_decay=args.weight_decay) elif args.optimizer == 'AdamW': from transformers import AdamW params = list(self.model.named_parameters()) no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight'] grouped_params = [{ 'params': [p for n, p in params if not any(nd in n for nd in no_decay)], 'weight_decay': args.weight_decay, 'lr': args.lr, 'ori_lr': args.lr }, { 'params': [p for n, p in params if any(nd in n for nd in no_decay)], 'weight_decay': 0.0, 'lr': args.lr, 'ori_lr': args.lr }] self.optimizer = AdamW(grouped_params, correct_bias=False) else: raise Exception( "Invalid optimizer. Must be 'sgd' or 'adam' or 'bert_adam'.") # Cuda if self.args.use_gpu: self.cuda() # Ckpt self.ckpt = ckpt