def train(args=None, param_path=None, **kw): if args is None: args = kw.get('args') if param_path is None: if args.param_path is not None: param_path = args.param_path else: param_path = './params/' #sys.path.append(param_path) model_dict, optimizer_dict, trainer_dict, data_loader_dict = get_param_dicts( args) trainer = build_trainer(trainer_dict) data_loader = build_data_loader(data_loader_dict) trainer.bind_data_loader(data_loader) # model can be RSLP, RMLP, RCNN ... model = build_model(model_dict) # optimizer can be BP, TP or CHL optimizer. optimizer = build_optimizer(optimizer_dict) optimizer.bind_model(model) optimizer.bind_trainer(trainer) trainer.bind_model(model) trainer.bind_optimizer(optimizer) trainer.train( ) # the model needs some data from data_loader to get response properties. model.analyze(data_loader=data_loader)
def main(args): this_dir = osp.join(osp.dirname(__file__), '.') os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') data_loader = utl.build_data_loader(args, 'extract') model = build_model(args).to(device) model.load_state_dict(torch.load(args.checkpoint)) model.train(False) with torch.set_grad_enabled(False): for batch_idx, (data, air_target, bed_target, save_path) in enumerate(data_loader): print('{:3.3f}%'.format(100.0 * batch_idx / len(data_loader))) batch_size = data.shape[0] data = data.to(device) air_feature, bed_feature = model.features(data) air_feature = air_feature.to('cpu').numpy() bed_feature = bed_feature.to('cpu').numpy() for bs in range(batch_size): if not osp.isdir(osp.dirname(save_path[bs])): os.makedirs(osp.dirname(save_path[bs])) np.save( save_path[bs], np.concatenate((air_feature[bs], bed_feature[bs]), axis=0))
def main(args): os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') utl.set_seed(int(args.seed)) model = VideoModel(args.hidden_size).to(device) criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) for epoch in range(args.epochs): data_loaders = { phase: utl.build_data_loader(args, phase) for phase in args.phases } for phase in args.phases: training = phase == 'train' if training: model.train(True) elif not training and args.debug: model.train(False) else: continue with torch.set_grad_enabled(training): avg_loss = 0 for batch_idx, (camera_inputs, motion_inputs, enc_target, dec_target) \ in enumerate(data_loaders[phase], start=1): camera_inputs = camera_inputs.to(device) enc_target = enc_target.to(device) if training: optimizer.zero_grad() scores = model(camera_inputs) # sum losses along all timesteps loss = criterion(scores[:, 0], enc_target[:, 0].max(axis=1)[1]) for step in range(1, camera_inputs.shape[1]): loss += criterion(scores[:, step], enc_target[:, step].max(axis=1)[1]) if training: loss.backward() optimizer.step() avg_loss += loss.item() print('{:5s} Epoch:{} Iteration:{} Loss:{:.3f}'.format( phase, epoch + 1, batch_idx, loss.item())) print('-- {:5s} Epoch:{} avg_loss:{:.3f}'.format( phase, epoch + 1, avg_loss / batch_idx))
def impute_test_set(self, data_set, batch_size=None): batch_size = batch_size or self.eval_batch_size data_iter = build_data_loader(data_set, self.device, batch_size, False, testing=True) self.model.eval() out_dir = self.out_path / 'imputations_test_set' out_dir.mkdir() imp_dfs = [] pbar = tqdm.tqdm(desc='Generating imputation', total=len(data_iter)) for idx, data in enumerate(data_iter): missing_masks = 1 - data['masks'] ret = self.model(data) imputation = ret['imputations'] pids = data['pids'] imp_df = pd.DataFrame(missing_masks.nonzero().data.cpu().numpy(), columns=['pid', 'tid', 'colid']) imp_df['pid'] = imp_df['pid'].map( {i: pid for i, pid in enumerate(pids)}) imp_df['analyte'] = imp_df['colid'].map(self.var_names_dict) imp_df['imputation'] = imputation[missing_masks == 1].data.cpu().numpy() imp_dfs.append(imp_df) for p in range(len(pids)): seq_len = data['lengths'][p] time_stamps = data['time_stamps'][p, :seq_len].unsqueeze(1) imp = imputation[p, :seq_len, :] df = pd.DataFrame(torch.cat([time_stamps, imp], dim=1).data.cpu().numpy(), columns=['CHARTTIME'] + self.var_names) df['CHARTTIME'] = df['CHARTTIME'].apply(np.int) df.to_csv(out_dir / f'{pids[p]}.csv', index=False) pbar.update() pbar.close() print(f'Done, results saved in:\n {out_dir.resolve()}') return imp_dfs
def main(args): this_dir = osp.join(osp.dirname(__file__), '.') os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') data_loaders = { phase: utl.build_data_loader(args, phase) for phase in args.phases } model = build_model(args).apply(utl.weights_init).to(device) air_criterion = nn.L1Loss().to(device) bed_criterion = nn.L1Loss().to(device) optimizer = optim.Adam(model.parameters(), lr=args.lr) for epoch in range(1, args.epochs + 1): # Learning rate scheduler if epoch == 5 or epoch % 10 == 0: args.lr = args.lr * 0.4 for param_group in optimizer.param_groups: param_group['lr'] = args.lr air_errors = {phase: 0.0 for phase in args.phases} bed_errors = {phase: 0.0 for phase in args.phases} start = time.time() for phase in args.phases: training = phase == 'train' if training: model.train(True) else: if epoch % args.test_interval == 0: model.train(False) else: continue with torch.set_grad_enabled(training): for batch_idx, (data, init, air_target, bed_target) in enumerate(data_loaders[phase]): batch_size = data.shape[0] data = data.to(device) init = init.to(device) air_target = air_target.to(device) bed_target = bed_target.to(device) air_output, bed_output = model(data, init) air_loss = air_criterion(air_output, air_target) bed_loss = bed_criterion(bed_output, bed_target) air_errors[phase] += air_loss.item() * batch_size bed_errors[phase] += bed_loss.item() * batch_size if args.debug: print(air_loss.item(), bed_loss.item()) if training: optimizer.zero_grad() loss = air_loss + bed_loss loss.backward() optimizer.step() end = time.time() if epoch % args.test_interval == 0: snapshot_path = osp.join(this_dir, 'snapshots') if not os.path.isdir(snapshot_path): os.makedirs(snapshot_path) snapshot_name = 'epoch-{}-air-{}-bed-{}.pth'.format( epoch, float( "{:.2f}".format(air_errors['test'] / len(data_loaders['test'].dataset) * 412)), float( "{:.2f}".format(bed_errors['test'] / len(data_loaders['test'].dataset) * 412)), ) torch.save(model.state_dict(), os.path.join(snapshot_path, snapshot_name)) print( 'Epoch {:2} | ' 'train loss (air): {:4.2f} (bed): {:4.2f} | ' 'test loss (air): {:4.2f} (bed): {:4.2f} | ' 'running time: {:.2f} sec'.format( epoch, air_errors['train'] / len(data_loaders['train'].dataset) * 412, bed_errors['train'] / len(data_loaders['train'].dataset) * 412, air_errors['test'] / len(data_loaders['test'].dataset) * 412, bed_errors['test'] / len(data_loaders['test'].dataset) * 412, end - start, ))
def main(args): this_dir = osp.join(osp.dirname(__file__), '.') save_dir = osp.join(this_dir, 'checkpoints') if not osp.isdir(save_dir): os.makedirs(save_dir) command = 'python ' + ' '.join(sys.argv) logger = utl.setup_logger(osp.join(this_dir, 'log.txt'), command=command) os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') utl.set_seed(int(args.seed)) model = build_model(args) if osp.isfile(args.checkpoint): checkpoint = torch.load(args.checkpoint, map_location=torch.device('cpu')) model.load_state_dict(checkpoint['model_state_dict']) else: model.apply(utl.weights_init) if args.distributed: model = nn.DataParallel(model) model = model.to(device) criterion = utl.MultiCrossEntropyLoss(ignore_index=21).to(device) optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) if osp.isfile(args.checkpoint): optimizer.load_state_dict(checkpoint['optimizer_state_dict']) for param_group in optimizer.param_groups: param_group['lr'] = args.lr args.start_epoch += checkpoint['epoch'] softmax = nn.Softmax(dim=1).to(device) for epoch in range(args.start_epoch, args.start_epoch + args.epochs): if epoch == 21: args.lr = args.lr * 0.1 for param_group in optimizer.param_groups: param_group['lr'] = args.lr data_loaders = { phase: utl.build_data_loader(args, phase) for phase in args.phases } enc_losses = {phase: 0.0 for phase in args.phases} enc_score_metrics = [] enc_target_metrics = [] enc_mAP = 0.0 dec_losses = {phase: 0.0 for phase in args.phases} dec_score_metrics = [] dec_target_metrics = [] dec_mAP = 0.0 start = time.time() for phase in args.phases: training = phase == 'train' if training: model.train(True) elif not training and args.debug: model.train(False) else: continue with torch.set_grad_enabled(training): for batch_idx, (camera_inputs, motion_inputs, enc_target, dec_target) \ in enumerate(data_loaders[phase], start=1): batch_size = camera_inputs.shape[0] camera_inputs = camera_inputs.to(device) motion_inputs = motion_inputs.to(device) enc_target = enc_target.to(device).view( -1, args.num_classes) dec_target = dec_target.to(device).view( -1, args.num_classes) enc_score, dec_score = model(camera_inputs, motion_inputs) enc_loss = criterion(enc_score, enc_target) dec_loss = criterion(dec_score, dec_target) enc_losses[phase] += enc_loss.item() * batch_size dec_losses[phase] += dec_loss.item() * batch_size if args.verbose: print( 'Epoch: {:2} | iteration: {:3} | enc_loss: {:.5f} dec_loss: {:.5f}' .format(epoch, batch_idx, enc_loss.item(), dec_loss.item())) if training: optimizer.zero_grad() loss = enc_loss + dec_loss loss.backward() optimizer.step() else: # Prepare metrics for encoder enc_score = softmax(enc_score).cpu().numpy() enc_target = enc_target.cpu().numpy() enc_score_metrics.extend(enc_score) enc_target_metrics.extend(enc_target) # Prepare metrics for decoder dec_score = softmax(dec_score).cpu().numpy() dec_target = dec_target.cpu().numpy() dec_score_metrics.extend(dec_score) dec_target_metrics.extend(dec_target) end = time.time() if args.debug: result_file = 'inputs-{}-epoch-{}.json'.format(args.inputs, epoch) # Compute result for encoder enc_mAP = utl.compute_result_multilabel( args.class_index, enc_score_metrics, enc_target_metrics, save_dir, result_file, ignore_class=[0, 21], save=True, ) # Compute result for decoder dec_mAP = utl.compute_result_multilabel( args.class_index, dec_score_metrics, dec_target_metrics, save_dir, result_file, ignore_class=[0, 21], save=False, ) # Output result logger.output(epoch, enc_losses, dec_losses, len(data_loaders['train'].dataset), len(data_loaders['test'].dataset), enc_mAP, dec_mAP, end - start, debug=args.debug) # Save model checkpoint_file = 'inputs-{}-epoch-{}.pth'.format(args.inputs, epoch) torch.save( { 'epoch': epoch, 'model_state_dict': model.module.state_dict() if args.distributed else model.state_dict(), 'optimizer_state_dict': optimizer.state_dict(), }, osp.join(save_dir, checkpoint_file))
def k_fold(): images, masks = load_train_data(TRAIN_IMAGES_PATH, TRAIN_MASKS_PATH) test_file_paths, test_images = load_test_data(TEST_IMAGES_PATH, load_images=True, to256=False) train_transformer = transforms.Compose([ CropAugmenter(), AffineAugmenter(), MasksAdder(), ToTensor(), Normalize(), ClassAdder() ]) eval_transformer = transforms.Compose( [MasksAdder(), ToTensor(), Normalize(), ClassAdder()]) predict_transformer = transforms.Compose( [ToTensor(predict=True), Normalize(predict=True)]) test_images_loader = build_data_loader(test_images, None, predict_transformer, batch_size=BATCH_SIZE, shuffle=False, num_workers=4, predict=True) k_fold = KFold(n_splits=FOLDS, random_state=RANDOM_SEED, shuffle=True) test_masks_folds = [] config = AttrDict({ 'cuda_index': CUDA_ID, 'momentum': MOMENTUM, 'lr': LR, 'tune_lr': TUNE_LR, 'min_lr': MIN_LR, 'bce_epochs': BCE_EPOCHS, 'intermediate_epochs': INTERMEDIATE_EPOCHS, 'cycle_length': CYCLE_LENGTH, 'logs_dir': LOGS_DIR, 'masks_weight': MASKS_WEIGHT, 'class_weight': CLASS_WEIGHT, 'val_metric_criterion': 'comp_metric' }) for index, (train_index, valid_index) in list(enumerate(k_fold.split(images))): print('fold_{}\n'.format(index)) x_train_fold, x_valid = images[train_index], images[valid_index] y_train_fold, y_valid = masks[train_index], masks[valid_index] train_data_loader = build_data_loader(x_train_fold, y_train_fold, train_transformer, batch_size=BATCH_SIZE, shuffle=True, num_workers=4, predict=False) val_data_loader = build_data_loader(x_valid, y_valid, eval_transformer, batch_size=BATCH_SIZE, shuffle=False, num_workers=4, predict=False) test_data_loader = build_data_loader(x_valid, y_valid, eval_transformer, batch_size=BATCH_SIZE, shuffle=False, num_workers=4, predict=False) data_loaders = AttrDict({ 'train': train_data_loader, 'val': val_data_loader, 'test': test_data_loader }) zers = np.zeros(BCE_EPOCHS) zers += 0.1 lovasz_ratios = np.linspace(0.1, 0.9, INTERMEDIATE_EPOCHS) lovasz_ratios = np.hstack((zers, lovasz_ratios)) bce_ratios = 1.0 - lovasz_ratios loss_weights = [ (bce_ratio, lovasz_ratio) for bce_ratio, lovasz_ratio in zip(bce_ratios, lovasz_ratios) ] loss = LossAggregator((nn.BCEWithLogitsLoss(), LovaszLoss()), weights=[0.9, 0.1]) metrics = { 'binary_accuracy': BinaryAccuracy, 'dice_coefficient': DiceCoefficient, 'comp_metric': CompMetric } segmentor = SawSeenNet(base_channels=64, pretrained=True, frozen=False).cuda(config.cuda_index) trainer = Trainer(config=config, model=segmentor, loss=loss, loss_weights=loss_weights, metrics=metrics, data_loaders=data_loaders) segmentor = trainer.train(num_epochs=NUM_EPOCHS, model_pattern=MODEL_FILE_PATH + '_{}_fold.pth'.format(index)) test_masks = predict(config, segmentor, test_images_loader, thresholding=False) test_masks = trim_masks(test_masks, height=IMG_SIZE_ORIGIN, width=IMG_SIZE_ORIGIN) test_masks_folds.append(test_masks) np.save(FOLDS_FILE_PATH.format(index), test_masks) result_masks = np.zeros_like(test_masks_folds[0]) for test_masks in test_masks_folds: result_masks += test_masks result_masks = result_masks.astype(dtype=np.float32) result_masks /= FOLDS result_masks = result_masks > THRESHOLD return test_file_paths, result_masks
def fit(self, epochs=300, batch_size=64, eval_batch_size=64, eval_epoch=1, record_imp_epoch=50): # construct optimizer context_rnn_params = { 'params': self.model.context_rnn.parameters(), 'lr': 1e-3, 'weight_decay': 5e-3 } imp_rnn_params = { 'params': [ p[1] for p in self.model.named_parameters() if p[0].split('.')[0] != 'context_rnn' ], 'lr': 1e-3, 'weight_decay': 5e-5 } optimizer = optim.Adam([context_rnn_params, imp_rnn_params]) train_iter = build_data_loader(self.train_set, self.device, batch_size, shuffle=True) valid_iter = build_data_loader(self.valid_set, self.device, eval_batch_size, shuffle=True) self.eval_batch_size = eval_batch_size imp_dfs_train = None imp_dfs_valid = None for epoch in range(epochs): self.model.train() pbar_desc = f'Epoch {epoch+1}: ' pbar = tqdm.tqdm(total=len(train_iter), desc=pbar_desc) total_loss = AverageMeter() total_loss_eval = AverageMeter() verbose_loss = [AverageMeter() for _ in range(6)] for idx, data in enumerate(train_iter): optimizer.zero_grad() ret = self.model(data) clip_grad_norm_(self.model.parameters(), 1) ret['loss'].backward() optimizer.step() total_loss.update(ret['loss'].item(), ret['loss_count'].item()) total_loss_eval.update(ret['loss_eval'].item(), ret['loss_eval_count'].item()) for i, (k, v, c) in enumerate(ret['verbose_loss']): verbose_loss[i].update(v.item(), c) pbar.set_description(pbar_desc + f'Training loss={total_loss.avg:.3e}') pbar.update() pbar_desc = f'Epoch {epoch + 1} done, Training loss={total_loss.avg:.3e}' pbar.set_description(pbar_desc) pbar.close() if (epoch + 1) % eval_epoch == 0: self.evaluate(valid_iter) if record_imp_epoch and (epoch + 1) % record_imp_epoch == 0: if imp_dfs_train is None: imp_dfs_train = self.retrieve_imputation( train_iter, epoch + 1) else: imp_dfs_train = pd.concat([ imp_dfs_train, self.retrieve_imputation(train_iter, epoch + 1) ], axis=0) if imp_dfs_valid is None: imp_dfs_valid = self.retrieve_imputation( valid_iter, epoch + 1) else: imp_dfs_valid = pd.concat([ imp_dfs_valid, self.retrieve_imputation(valid_iter, epoch + 1) ], axis=0) imp_dfs_train.to_excel(self.out_path / 'imp_train.xlsx', merge_cells=False) imp_dfs_valid.to_excel(self.out_path / 'imp_valid.xlsx', merge_cells=False) print( 'Training is done, performing final evaluation on validation set...' ) loss_valid, mae, mre, nrmsd = self.evaluate(valid_iter) with open(self.out_path / 'final_eval.csv', 'w') as txtfile: txtfile.write(f'Metrics, ' + (', '.join(self.var_names)) + '\n') txtfile.write(f'MAE, ' + (', '.join([f'{x:.3f}' for x in mae])) + '\n') txtfile.write(f'MRE, ' + (', '.join([f'{x:.3f}' for x in mre])) + '\n') txtfile.write(f'nRMSD, ' + (', '.join([f'{x:.3f}' for x in nrmsd])) + '\n')