def evaluate(self, mode='test'): if mode == 'dev': test_loader = DataLoader( self.val_set, batch_size=self.args.test_batch_size) elif mode == 'test': test_loader = DataLoader( self.test_set, batch_size=self.args.test_batch_size) else: raise ValueError('Invalid Mode!!!') self.model.eval() nb_right_all, nb_pred_all, nb_gold_all = 0, 0, 0 with torch.no_grad(): for i, batcher in enumerate(test_loader): batch = batch_variable(batcher, self.vocabs) batch.to_device(self.args.device) pred_score = self.model( batch.wd_ids, batch.ch_ids, batch.tag_ids, batch.bert_inps) sent_lens = batch.wd_ids.gt(0).sum(dim=1) gold_res = self.ner_gold( batch.ner_ids, sent_lens, self.vocabs['ner']) pred_res = self.ner_pred( pred_score, sent_lens, self.vocabs['ner']) nb_right, nb_pred, nb_gold = self.calc_acc( pred_res, gold_res, return_prf=False) nb_right_all += nb_right nb_pred_all += nb_pred nb_gold_all += nb_gold p, r, f = self.calc_prf(nb_right_all, nb_pred_all, nb_gold_all) return dict(p=p, r=r, f=f)
def read_data(self): """ read and prepare train and eval dataset """ if self.resume is None: # create language objects self.src_language = Language(self.params.model.source) self.tgt_language = Language(self.params.model.target) # read vocabulary from file self.src_language.read_vocabulary(Path(self.params.data.src_vocab)) self.tgt_language.read_vocabulary(Path(self.params.data.tgt_vocab)) else: # load from file self.checkpoint = torch.load(Path(self.resume), map_location=self.device) self.model = self.checkpoint["model"] # load eval dataset self.eval_data = DataLoader.from_files(self.params.data.src_eval, self.params.data.tgt_eval, self.params.model.max_length, self.params.training.batch_size) # load train dataset if self.batched: self.train_data = BatchedData(Path(self.batched), self.params.model.max_length, self.params.training.batch_size) else: self.train_data = DataLoader.from_files( self.params.data.src_train, self.params.data.tgt_train, self.params.model.max_length, self.params.training.batch_size)
def train_iter(self, ep, train_set, optimizer): t1 = time.time() train_acc, train_loss = 0., 0. train_loader = DataLoader(train_set, batch_size=self.args.batch_size, shuffle=True) self.model.train() for i, batcher in enumerate(train_loader): batch = batch_variable(batcher, self.vocabs) batch.to_device(self.args.device) pred = self.model(batch.x, batch.nx, batch.ew) loss = F.nll_loss(pred, batch.y) loss.backward() nn_utils.clip_grad_norm_(filter(lambda p: p.requires_grad, self.model.parameters()), max_norm=args.grad_clip) optimizer.step() self.model.zero_grad() loss_val = loss.data.item() train_loss += loss_val train_acc += (pred.data.argmax(dim=-1) == batch.y).sum().item() logger.info( '[Epoch %d] Iter%d time cost: %.2fs, lr: %.6f, train acc: %.4f, train loss: %.4f' % (ep, i + 1, (time.time() - t1), optimizer.get_lr(), train_acc / len(train_set), loss_val)) return train_loss / len(train_set), train_acc / len(train_set)
def batch_dataset(args): # read data from configuration file config = Parameters.from_config(args.path) # load entire dataset train_data = DataLoader.from_files( config.data.src_train, config.data.tgt_train, config.model.max_length, config.training.batch_size ) outputfile = Path(args.output) with open(outputfile, "w", encoding="utf-8") as ofile: for i, batch in enumerate(train_data): for src, tgt in zip(*batch): s_sen = " ".join(src) t_sen = " ".join(tgt) ofile.write(f"{s_sen}\t{t_sen}\n") # print progress print(f"Batching dataset: {i}/{len(train_data)}", end="\r") print(" " * 50, end="\r") print("Batching dataset: complete")
def eval(self, test_set): nb_correct, nb_total = 0, 0 test_loader = DataLoader(test_set, batch_size=self.args.test_batch_size) self.model.eval() with torch.no_grad(): for i, batcher in enumerate(test_loader): batch = batch_variable(batcher, self.vocabs) batch.to_device(self.args.device) pred = self.model(batch.x, batch.nx, batch.ew) nb_correct += (pred.data.argmax( dim=-1) == batch.y).sum().item() nb_total += len(batch.y) return nb_correct / nb_total
def eval(self, task_id, test_data): print(f'evaluating {get_task(task_id)} task ...') nb_correct, nb_total = 0, 0 self.model.eval() test_loader = DataLoader(test_data, batch_size=self.args.test_batch_size) with torch.no_grad(): for i, batcher in enumerate(test_loader): batch = batch_variable(batcher, self.wd_vocab) batch.to_device(self.args.device) task_logits, share_logits, _ = self.model( task_id, batch.wd_ids) nb_correct += (task_logits.data.argmax( dim=-1) == batch.lbl_ids).sum().item() nb_total += len(batch.lbl_ids) acc = nb_correct / nb_total # err = 1 - acc return acc
def train_iter(self, ep, task_id, train_data, optimizer): t1 = time.time() train_acc, train_loss = 0., 0. self.model.train() train_loader = DataLoader(train_data, batch_size=self.args.batch_size, shuffle=True) total_step = 200 * len(train_loader) step = 0 for i, batcher in enumerate(train_loader): batch = batch_variable(batcher, self.wd_vocab) batch.to_device(self.args.device) adv_lmbd = self.lambda_(step, total_step) task_logits, share_logits, diff_loss = self.model( task_id, batch.wd_ids, adv_lmbd) loss_task = F.cross_entropy(task_logits, batch.lbl_ids) loss_share = F.cross_entropy(share_logits, batch.task_ids) loss = loss_task + self.args.adv_loss_w * loss_share + self.args.diff_loss_w * diff_loss loss.backward() nn_utils.clip_grad_norm_(filter(lambda p: p.requires_grad, self.model.parameters()), max_norm=args.grad_clip) optimizer.step() self.model.zero_grad() loss_val = loss.data.item() train_loss += loss_val train_acc += (task_logits.data.argmax( dim=-1) == batch.lbl_ids).sum().item() logger.info( '[Epoch %d][Task %s] Iter%d time cost: %.2fs, lr: %.6f, train acc: %.4f, train loss: %.4f' % (ep, get_task(task_id), i + 1, (time.time() - t1), optimizer.get_lr(), train_acc / len(train_data), loss_val)) step += 1 return train_loss / len(train_data), train_acc / len(train_data)
def train(): strategy = tf.distribute.MirroredStrategy() file_names = [] with open(os.path.join(config.data_dir, 'train.txt')) as f: for file_name in f.readlines(): image_path = os.path.join(config.data_dir, config.image_dir, file_name.rstrip() + '.jpg') label_path = os.path.join(config.data_dir, config.label_dir, file_name.rstrip() + '.xml') if os.path.exists(image_path) and os.path.exists(label_path): if os.path.exists(os.path.join(config.data_dir, 'TF')): file_names.append( os.path.join(config.data_dir, 'TF', file_name.rstrip() + '.tf')) else: file_names.append(file_name.rstrip()) steps = len(file_names) // config.batch_size if os.path.exists(os.path.join(config.data_dir, 'TF')): dataset = DataLoader().input_fn(file_names) else: dataset = input_fn(file_names) dataset = strategy.experimental_distribute_dataset(dataset) with strategy.scope(): model = nn.build_model() model.summary() optimizer = tf.keras.optimizers.Adam(nn.CosineLR(steps), 0.937) with strategy.scope(): loss_object = nn.ComputeLoss() def compute_loss(y_true, y_pred): total_loss = loss_object(y_pred, y_true) return tf.reduce_sum(total_loss) / config.batch_size with strategy.scope(): def train_step(image, y_true): with tf.GradientTape() as tape: y_pred = model(image, training=True) loss = compute_loss(y_true, y_pred) variables = model.trainable_variables gradients = tape.gradient(loss, variables) optimizer.apply_gradients(zip(gradients, variables)) return loss with strategy.scope(): @tf.function def distributed_train_step(image, y_true): per_replica_losses = strategy.run(train_step, args=(image, y_true)) return strategy.reduce(tf.distribute.ReduceOp.SUM, per_replica_losses, axis=None) def train_fn(): if not os.path.exists('weights'): os.makedirs('weights') pb = tf.keras.utils.Progbar(steps, stateful_metrics=['loss']) print(f'[INFO] {len(file_names)} data points') for step, inputs in enumerate(dataset): if step % steps == 0: print(f'Epoch {step // steps + 1}/{config.num_epochs}') pb = tf.keras.utils.Progbar(steps, stateful_metrics=['loss']) step += 1 image, y_true_1, y_true_2, y_true_3 = inputs y_true = (y_true_1, y_true_2, y_true_3) loss = distributed_train_step(image, y_true) pb.add(1, [('loss', loss)]) if step % steps == 0: model.save_weights( os.path.join("weights", f"model_{config.version}.h5")) if step // steps == config.num_epochs: sys.exit("--- Stop Training ---") train_fn()
def train_eval(self): train_loader = DataLoader( self.train_set, batch_size=self.args.batch_size, shuffle=True) self.args.max_step = self.args.epoch * \ (len(train_loader) // self.args.update_step) print('max step:', self.args.max_step) optimizer = Optimizer( filter(lambda p: p.requires_grad, self.model.parameters()), args) best_dev_metric, best_test_metric = dict(), dict() patient = 0 for ep in range(1, 1 + self.args.epoch): train_loss = 0. self.model.train() t1 = time.time() train_right, train_pred, train_gold = 0, 0, 0 for i, batcher in enumerate(train_loader): batch = batch_variable(batcher, self.vocabs) batch.to_device(self.args.device) pred_score = self.model( batch.wd_ids, batch.ch_ids, batch.tag_ids, batch.bert_inps) loss = self.calc_loss(pred_score, batch.ner_ids) loss_val = loss.data.item() train_loss += loss_val sent_lens = batch.wd_ids.gt(0).sum(dim=1) gold_res = self.ner_gold( batch.ner_ids, sent_lens, self.vocabs['ner']) pred_res = self.ner_pred( pred_score, sent_lens, self.vocabs['ner']) nb_right, nb_pred, nb_gold = self.calc_acc( pred_res, gold_res, return_prf=False) train_right += nb_right train_pred += nb_pred train_gold += nb_gold train_p, train_r, train_f = self.calc_prf( train_right, train_pred, train_gold) if self.args.update_step > 1: loss = loss / self.args.update_step loss.backward() if (i + 1) % self.args.update_step == 0 or (i == self.args.max_step - 1): nn_utils.clip_grad_norm_(filter(lambda p: p.requires_grad, self.model.parameters()), max_norm=self.args.grad_clip) optimizer.step() self.model.zero_grad() logger.info('[Epoch %d] Iter%d time cost: %.2fs, lr: %.6f, train loss: %.3f, P: %.3f, R: %.3f, F: %.3f' % ( ep, i + 1, (time.time() - t1), optimizer.get_lr(), loss_val, train_p, train_r, train_f)) dev_metric = self.evaluate('dev') if dev_metric['f'] > best_dev_metric.get('f', 0): best_dev_metric = dev_metric test_metric = self.evaluate('test') if test_metric['f'] > best_test_metric.get('f', 0): # check_point = {'model': self.model.state_dict(), 'settings': args} # torch.save(check_point, self.args.model_chkp) best_test_metric = test_metric patient = 0 else: patient += 1 logger.info('[Epoch %d] train loss: %.4f, lr: %f, patient: %d, dev_metric: %s, test_metric: %s' % ( ep, train_loss, optimizer.get_lr(), patient, best_dev_metric, best_test_metric)) # if patient >= (self.args.patient // 2 + 1): # 训练一定epoch, dev性能不上升, decay lr # optimizer.lr_decay(0.95) if patient >= self.args.patient: # early stopping break logger.info('Final Metric: %s' % best_test_metric)
image_path = join(config.base_dir, config.image_dir, line.rstrip() + '.jpg') label_path = join(config.base_dir, config.label_dir, line.rstrip() + '.xml') if exists(image_path) and exists(label_path): if exists(join(config.base_dir, 'TF')): file_names.append( join(config.base_dir, 'TF', line.rstrip() + '.tf')) else: file_names.append(line.rstrip()) print(f'[INFO] {len(file_names)} data points') num_replicas = strategy.num_replicas_in_sync steps = len(file_names) // config.batch_size if exists(join(config.base_dir, 'TF')): dataset = DataLoader().input_fn(file_names) else: dataset = input_fn(file_names) dataset = strategy.experimental_distribute_dataset(dataset) with strategy.scope(): model = nn.build_model() model.summary() optimizer = tf.keras.optimizers.Adam(nn.CosineLrSchedule(steps), 0.937) with strategy.scope(): loss_object = nn.ComputeLoss() def compute_loss(y_true, y_pred): total_loss = loss_object(y_pred, y_true) return tf.reduce_sum(total_loss) / config.batch_size
def evaluate(self, mode='test'): if mode == 'dev': test_loader = DataLoader(self.val_set, batch_size=self.args.test_batch_size) elif mode == 'test': test_loader = DataLoader(self.test_set, batch_size=self.args.test_batch_size) else: raise ValueError('Invalid Mode!!!') self.model.eval() rel_vocab = self.vocabs['rel'] nb_head_gold, nb_head_pred, nb_head_correct = 0, 0, 0 nb_rel_gold, nb_rel_pred, nb_rel_correct = 0, 0, 0 with torch.no_grad(): for i, batcher in enumerate(test_loader): batch = batch_variable(batcher, self.vocabs) batch.to_device(self.args.device) head_score, rel_score = self.model(batch.wd_ids, batch.ch_ids, batch.tag_ids) mask = batch.wd_ids.gt(0) lens = mask.sum(dim=1) graph_pred = self.model.graph_decode(head_score, rel_score, mask) pred_deps = self.parse_pred_graph(graph_pred, lens, rel_vocab) gold_deps = self.parse_gold_graph(batch.rel_ids, lens, rel_vocab) assert len(pred_deps) == len(gold_deps) # for deps_p, deps_g in zip(pred_deps, gold_deps): # nb_head_gold += len(deps_g) # nb_rel_gold += len(deps_g) # # nb_head_pred += len(deps_p) # nb_rel_pred += len(deps_p) # for dg in deps_g: # for dp in deps_p: # if dg[:-1] == dp[:-1]: # nb_head_correct += 1 # if dg == dp: # nb_rel_correct += 1 # break for pdeps, gdeps in zip(pred_deps, gold_deps): # sentence assert len(pdeps) == len(gdeps) for pdep, gdep in zip(pdeps, gdeps): # word nb_head_pred += len(pdep) nb_rel_pred += len(pdep) nb_head_gold += len(gdep) nb_rel_gold += len(gdep) for gd in gdep: # (head_id, rel_id) for pd in pdep: if pd[0] == gd[0]: nb_head_correct += 1 if pd == gd: nb_rel_correct += 1 break up, ur, uf = self.calc_prf(nb_head_correct, nb_head_pred, nb_head_gold) lp, lr, lf = self.calc_prf(nb_rel_correct, nb_rel_pred, nb_rel_gold) return dict(up=up, ur=ur, uf=uf, lp=lp, lr=lr, lf=lf)
def train_eval(self): train_loader = DataLoader(self.train_set, batch_size=self.args.batch_size, shuffle=True) self.args.max_step = self.args.epoch * (len(train_loader) // self.args.update_step) print('max step:', self.args.max_step) optimizer = Optimizer( filter(lambda p: p.requires_grad, self.model.parameters()), args) best_dev_metric, best_test_metric = dict(), dict() patient = 0 for ep in range(1, 1 + self.args.epoch): train_loss = 0. self.model.train() t1 = time.time() train_head_acc, train_rel_acc, train_total_head = 0, 0, 0 for i, batcher in enumerate(train_loader): batch = batch_variable(batcher, self.vocabs) batch.to_device(self.args.device) head_score, rel_score = self.model(batch.wd_ids, batch.ch_ids, batch.tag_ids) loss = self.calc_loss(head_score, rel_score, batch.head_ids, batch.rel_ids, batch.wd_ids.gt(0)) loss_val = loss.data.item() train_loss += loss_val head_acc, rel_acc, total_head = self.calc_acc( head_score, rel_score, batch.head_ids, batch.rel_ids) train_head_acc += head_acc train_rel_acc += rel_acc train_total_head += total_head if self.args.update_step > 1: loss = loss / self.args.update_step loss.backward() if (i + 1) % self.args.update_step == 0 or ( i == self.args.max_step - 1): nn_utils.clip_grad_norm_(filter(lambda p: p.requires_grad, self.model.parameters()), max_norm=self.args.grad_clip) optimizer.step() self.model.zero_grad() logger.info( '[Epoch %d] Iter%d time cost: %.2fs, lr: %.6f, train loss: %.3f, head acc: %.3f, rel acc: %.3f' % (ep, i + 1, (time.time() - t1), optimizer.get_lr(), loss_val, train_head_acc / train_total_head, train_rel_acc / train_total_head)) dev_metric = self.evaluate('dev') if dev_metric['uf'] > best_dev_metric.get('uf', 0): best_dev_metric = dev_metric test_metric = self.evaluate('test') if test_metric['uf'] > best_test_metric.get('uf', 0): # check_point = {'model': self.model.state_dict(), 'settings': args} # torch.save(check_point, self.args.model_chkp) best_test_metric = test_metric patient = 0 else: patient += 1 logger.info( '[Epoch %d] train loss: %.4f, lr: %f, patient: %d, dev_metric: %s, test_metric: %s' % (ep, train_loss, optimizer.get_lr(), patient, best_dev_metric, best_test_metric)) # if patient == (self.args.patient // 2 + 1): # 训练一定epoch, dev性能不上升, decay lr # optimizer.lr_decay(0.95) if patient >= self.args.patient: # early stopping break logger.info('Final Metric: %s' % best_test_metric)
train_set = get_training_set(upscale_factor, input_channels, target_channels, training_path, patch_size=patch_size, early_upsampling=early_upsampling) test_set = get_test_set(upscale_factor, input_channels, target_channels, test_path, patch_size=patch_size, early_upsampling=early_upsampling) training_data_loader = DataLoader(dataset=train_set, num_workers=threads, batch_size=batchSize, shuffle=True) testing_data_loader = DataLoader(dataset=test_set, num_workers=threads, batch_size=testBatchSize, shuffle=False) if args.verbose: print("{} training images / {} testing images".format( len(train_set), len(test_set))) print("===> dataset loaded !") print('===> Building model') model = EDSR(upscale_factor, input_channels, target_channels,