def train(args): batch_size = 16 output_path = base_output_path dataset = args.dataset data_path = args.data_path + dataset + '/' + dataset tokenizer, vocab2id, id2vocab = bert_tokenizer() detokenizer = bert_detokenizer() train_samples = torch.load(data_path + '.pkl') marco_train_size = len(train_samples) train_dataset = GLKSDataset(train_samples, None, None, None, None, None, None, None, None, sample_tensor=torch.load(data_path + '.GLKS.dataset.pkl')) model = GLKS(min_window_size, num_windows, embedding_size, hidden_size, vocab2id, id2vocab, max_dec_len=max_target_length, beam_width=1, emb_matrix=None) init_params(model) model_bp_count = (epoch * marco_train_size) / (4 * batch_size * accumulation_steps) model_optimizer = optim.Adam(model.parameters(), lr=2.5e-4) model_scheduler = get_cosine_with_hard_restarts_schedule_with_warmup( model_optimizer, 2000, int(model_bp_count) + 100) model_trainer = CumulativeTrainer(model, tokenizer, detokenizer, args.local_rank, 4, accumulation_steps=accumulation_steps) for i in range(epoch): model_trainer.train_epoch('ds_mle_mce_train', train_dataset, collate_fn, batch_size, i, model_optimizer, model_scheduler) model_trainer.serialize(i, output_path=output_path)
def train(args): tokenizer, vocab2id, id2vocab = bert_tokenizer() detokenizer = bert_detokenizer() data_path = os.path.join(args.data_path, args.dataset + '/') train_samples = torch.load( os.path.join(data_path, args.dataset + '.train.pkl')) train_size = len(train_samples) train_dataset = CaSEDataset( train_samples, None, None, None, None, None, None, None, None, None, None, sample_tensor=torch.load( os.path.join(data_path, args.dataset + '.train.CaSE.dataset.pkl'))) model = CaSE(args.max_span_size, args.max_target_length, id2vocab, vocab2id, args.hidden_size) init_params(model) model_bp_count = (args.epoch * train_size) / ( args.num_gpu * args.batch_size * args.accumulation_steps) model_optimizer = optim.Adam(model.parameters(), lr=2.5e-4) model_scheduler = get_cosine_with_hard_restarts_schedule_with_warmup( model_optimizer, 2000, int(model_bp_count) + 100) model_trainer = CumulativeTrainer( model, tokenizer, detokenizer, args.local_rank, args.num_gpu, accumulation_steps=args.accumulation_steps) for i in range(args.epoch): model_trainer.train_epoch('train', train_dataset, collate_fn, args.batch_size, i, model_optimizer, model_scheduler) model_trainer.serialize(i, output_path=args.output_path)
def train(args): batch_size = args.train_batch_size ratio = args.profile_dropout_ratio policy = args.neighbor_policy task_dir = '%s/%s-%s' % (src, task, policy) drop_attr = '' if args.keep_attributes is not None: for k in _keys: if k not in args.keep_attributes: drop_attr += '_%s' % k _, _, _, kb_vocab = torch.load('%s/kbs.pkl' % task_dir) candidates = torch.load('%s/candidates.pkl' % task_dir) candidate_tensor = torch.load('%s/candidate.ctds.pkl' % task_dir) # candidate_tensor = candidate_tensor.cuda() if torch.cuda.is_available() else candidate_tensor train_samples = torch.load('%s/train.pkl' % task_dir) train_sample_tensor = torch.load('%s/train.ctds-%s%s.pkl' % (task_dir, ratio, drop_attr)) meta_data = torch.load('%s/meta.pkl' % task_dir) vocab2id, id2vocab = torch.load('%s/vocab.pkl' % task_dir) tokenizer = babi_tokenizer print('Item size', len(vocab2id)) train_dataset = CTDSDataset( train_samples[:cut_data_index], candidates, meta_data, tokenizer, vocab2id, id2vocab, sample_tensor=train_sample_tensor[:cut_data_index], train_sample_tensor=train_sample_tensor) if args.train_epoch_start > 0: # load a model and continue to train file = os.path.join(output_model_path, str(args.train_epoch_start) + '.pkl') if os.path.exists(file): model = CTDS(hidden_size, vocab2id, id2vocab, candidate_tensor, meta_data) model.load_state_dict(torch.load(file, map_location='cpu')) else: print('ERR: do not have %s' % args.train_epoch_start) else: model = CTDS(hidden_size, vocab2id, id2vocab, candidate_tensor, meta_data) init_params(model) train_size = len(train_dataset) model_bp_count = (epoches * train_size) / ( 4 * batch_size * accumulation_steps) # global_batch_step model_optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) # model_optimizer = optim.Adam(model.parameters(), lr=args.lr) # model_optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=0.9, nesterov=True) if args.warmup > 0: model_scheduler = get_cosine_with_hard_restarts_schedule_with_warmup( model_optimizer, round(args.warmup * model_bp_count), int(model_bp_count) + 100) else: model_scheduler = None model_trainer = CumulativeTrainer( model, tokenizer, None, args.local_rank, 4, accumulation_steps=accumulation_steps, max_grad_norm=args.max_grad_norm, save_data_attributes=save_data_attributes) for i in range(args.train_epoch_start, epoches): model_trainer.train_epoch('train', train_dataset, collate_fn, batch_size, i, model_optimizer, model_scheduler) model_trainer.serialize(i, output_path=output_model_path)