Пример #1
0
    def __init__(self, opt, cloth_dir=None, body_dir=None):
        """

        Args:
            opt:
            cloth_dir: (optional) path to cloth dir, if provided
            body_dir: (optional) path to body dir, if provided
        """
        super().__init__(opt)

        self.cloth_dir = cloth_dir if cloth_dir else os.path.join(opt.dataroot, "cloth")
        print("cloth dir", self.cloth_dir)
        extensions = [".npz"] if self.opt.cloth_representation == "labels" else None
        print("Extensions:", extensions)
        self.cloth_files = find_valid_files(self.cloth_dir, extensions)
        if not opt.shuffle_data:
            self.cloth_files.sort()

        self.body_dir = body_dir if body_dir else os.path.join(opt.dataroot, "body")
        if not self.is_train:  # only load these during inference
            self.body_files = find_valid_files(self.body_dir)
            if not opt.shuffle_data:
                self.body_files.sort()
        print("body dir", self.body_dir)
        self.body_norm_stats = get_norm_stats(os.path.dirname(self.body_dir), "body")
        opt.body_norm_stats = self.body_norm_stats
        self._normalize_body = transforms.Normalize(*self.body_norm_stats)

        self.cloth_transform = get_transforms(opt)
Пример #2
0
 def __init__(self, weights):
     super(Recognition, self).__init__()
     torch.backends.cudnn.benchmark = True
     self.weights = weights
     self.model = FaceNetModel(embedding_size=128, num_classes=10000)
     self.device, device_ids = self._prepare_device([0])
     # self.device = torch.device('cpu')
     self.model = self.model.to(self.device)
     if len(device_ids) > 1:
         self.model = torch.nn.DataParallel(self.model,
                                            device_ids=device_ids)
     self.transforms = get_transforms(phase='valid', width=224, height=224)
     if self.weights is not None:
         print('Load Checkpoint')
         checkpoint = torch.load(weights,
                                 map_location=lambda storage, loc: storage)
         self.model.load_state_dict(checkpoint['state_dict'])
     self.model.eval()
     # init
     self.image = torch.FloatTensor(1, 3, 224, 224)
     self.image = self.image.to(self.device)
Пример #3
0
def main():
    seed_everything(args.seed)
    logger = Logger()
    # try:
    #     os.system(f'rm {args.log_dir}/log.train_exp_{args.exp}_fold_{args.fold_id}.txt')
    # except:
    #     pass
    logger.open(
        f'{args.log_dir}/log.train_exp_{args.exp}_fold_{args.fold_id}.txt',
        mode='a')

    data_dir = f'{args.root_dir}/input/ranzcr-clip-catheter-line-classification/train'

    df_train = pd.read_csv(
        f'{args.root_dir}/input/how-to-properly-split-folds/train_folds.csv')
    Abnormal_cols = [c for c in df_train.columns if 'Abnormal' in c]
    normal_cols = [c for c in df_train.columns if 'Normal' in c]
    df_train['abnormal'] = (df_train[Abnormal_cols].sum(1) > 0).astype(int)
    df_train['normal'] = (df_train[normal_cols].sum(1) > 0).astype(int)
    df_train['file_path'] = df_train.StudyInstanceUID.apply(
        lambda x: os.path.join(data_dir, f'{x}.jpg'))

    if args.debug:
        df_train = df_train.sample(frac=0.1)
    #target_cols = df_train.iloc[:, 1:12].columns.tolist()
    target_cols = [
        'ETT - Abnormal', 'ETT - Borderline', 'ETT - Normal', 'NGT - Abnormal',
        'NGT - Borderline', 'NGT - Incompletely Imaged', 'NGT - Normal',
        'CVC - Abnormal', 'CVC - Borderline', 'CVC - Normal',
        'Swan Ganz Catheter Present', 'abnormal', 'normal'
    ]

    #dataset = RANZERDataset(df_train, 'train', transform=args.transforms_train)

    if 'efficientnet' in args.model_name:
        model = RANZCREffiNet(args.model_name,
                              out_dim=len(target_cols),
                              pretrained=True)
    elif 'vit' in args.model_name:
        model = RANZCRViT(args.model_name,
                          out_dim=len(target_cols),
                          pretrained=True)
    else:
        model = RANZCRResNet200D(args.model_name,
                                 out_dim=len(target_cols),
                                 pretrained=True)

    if DP:
        model = apex.parallel.convert_syncbn_model(model)
    model = model.to(device)

    criterion = nn.BCEWithLogitsLoss()
    optimizer = optim.Adam(model.parameters(),
                           lr=args.init_lr / args.warmup_factor)
    scheduler_cosine = torch.optim.lr_scheduler.CosineAnnealingLR(
        optimizer, args.n_epochs, eta_min=1e-7)
    scheduler_warmup = GradualWarmupSchedulerV2(
        optimizer,
        multiplier=10,
        total_epoch=args.warmup_epo,
        after_scheduler=scheduler_cosine)

    if args.use_amp:
        model, optimizer = amp.initialize(model, optimizer, opt_level="O1")
    if DP:
        model = nn.DataParallel(model)

    df_train_this = df_train[df_train['fold'] != args.fold_id]
    df_valid_this = df_train[df_train['fold'] == args.fold_id]

    transforms_train, transforms_valid = get_transforms(args.image_size)

    dataset_train = RANZERDataset(df_train_this,
                                  'train',
                                  target_cols,
                                  transform=transforms_train)
    dataset_valid = RANZERDataset(df_valid_this,
                                  'valid',
                                  target_cols,
                                  transform=transforms_valid)

    train_loader = torch.utils.data.DataLoader(dataset_train,
                                               batch_size=args.batch_size,
                                               shuffle=True,
                                               num_workers=args.num_workers)

    valid_loader = torch.utils.data.DataLoader(
        dataset_valid,
        batch_size=args.valid_batch_size,
        shuffle=False,
        num_workers=args.num_workers)

    log = {}
    roc_auc_max = 0.
    loss_min = 99999
    not_improving = 0

    logger.write(f"{'#'*20} start training fold : {args.fold_id}\n")
    for epoch in range(1, args.n_epochs + 1):
        scheduler_warmup.step(epoch - 1)
        loss_train = train_func(train_loader, model, optimizer, criterion)
        loss_valid, roc_auc = valid_func(valid_loader, model, optimizer,
                                         criterion, target_cols)

        log['loss_train'] = log.get('loss_train', []) + [loss_train]
        log['loss_valid'] = log.get('loss_valid', []) + [loss_valid]
        log['lr'] = log.get('lr', []) + [optimizer.param_groups[0]["lr"]]
        log['roc_auc'] = log.get('roc_auc', []) + [roc_auc]

        content = time.ctime() + ' ' + f'Fold {args.fold_id}, Epoch {epoch}, ' \
                                       f'lr: {optimizer.param_groups[0]["lr"]:.7f}, ' \
                                       f'loss_train: {loss_train:.5f}, ' \
                                       f'loss_valid: {loss_valid:.5f}, ' \
                                       f'roc_auc: {roc_auc:.6f}.\n'
        #print(content)
        logger.write(content)
        not_improving += 1

        if roc_auc > roc_auc_max:
            logger.write(
                f'roc_auc_max ({roc_auc_max:.6f} --> {roc_auc:.6f}). Saving model ...\n'
            )
            torch.save(
                model.state_dict(),
                f'{args.model_dir}/{args.model_name}_fold{args.fold_id}_best_AUC_{roc_auc_max:.4f}.pth'
            )
            roc_auc_max = roc_auc
            not_improving = 0

        if loss_valid < loss_min:
            loss_min = loss_valid
            torch.save(
                model.state_dict(),
                f'{args.model_dir}/{args.model_name}_fold{args.fold_id}_best_loss_{loss_min:.4f}.pth'
            )

        if not_improving == args.early_stop:
            logger.write('Early Stopping...')
            break

    torch.save(
        model.state_dict(),
        f'{args.model_dir}/{args.model_name}_fold{args.fold_id}_final.pth')
    with open(f'{args.log_dir}/logs.pickle', 'wb') as handle:
        pickle.dump(log, handle, protocol=pickle.HIGHEST_PROTOCOL)
    optimizer = optim.Adam(model.parameters(), lr=args.init_lr/args.warmup_factor)
    scheduler_cosine = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, args.n_epochs, eta_min=1e-7)
    scheduler_warmup = GradualWarmupSchedulerV2(optimizer, multiplier=10,
                                                total_epoch=args.warmup_epo,
                                                after_scheduler=scheduler_cosine)
    
    if args.use_amp:
        model, optimizer = amp.initialize(model, optimizer, opt_level="O1")
    if DP:
        model = nn.DataParallel(model)
    

    df_train_this = df_train[df_train['fold'] != args.fold_id]
    df_valid_this = df_train[df_train['fold'] == args.fold_id]

    transforms_train, transforms_valid = get_transforms(args.image_size)

    dataset_train = RANZERDataset(df_train_this, 'train', target_cols, transform=transforms_train)
    dataset_valid = RANZERDataset(df_valid_this, 'valid', target_cols, transform=transforms_valid)

    train_loader = torch.utils.data.DataLoader(dataset_train,
                                               batch_size=args.batch_size,
                                               shuffle=True,
                                               num_workers=args.num_workers)

    valid_loader = torch.utils.data.DataLoader(dataset_valid,
                                               batch_size=args.valid_batch_size,
                                               shuffle=False,
                                               num_workers=args.num_workers)

    log = {}
Пример #5
0
def main():
    seed_everything(args.seed)
    logger = Logger()
    # try:
    #     os.system(f'rm {args.log_dir}/log.train_exp_{args.exp}_fold_{args.fold_id}.txt')
    # except:
    #     pass

    enet_type = ['resnet200d'] * 5
    model_paths = [f'{args.root_dir}/src_1/weights_exp1/resnet200d_fold0_best_AUC_0.9457.pth',
                   f'{args.root_dir}/src_1/resnet200d_fold1_best_AUC_0.9527.pth',
                   f'{args.root_dir}/src_1/resnet200d_fold2_best_AUC_0.9530.pth',
                   f'{args.root_dir}/src_1/resnet200d_fold3_best_AUC_0.9528.pth',
                   f'{args.root_dir}/src_1/resnet200d_fold4_best_AUC_0.9499.pth']

    logger.open(f'{args.log_dir}/log.train_exp_{args.exp}_fold_{args.fold_id}.txt', mode='a')



    data_dir = f'{args.root_dir}/input/ranzcr-clip-catheter-line-classification/train'

    df_train = pd.read_csv(f'{args.root_dir}/input/how-to-properly-split-folds/train_folds.csv')
    df_train['file_path'] = df_train.StudyInstanceUID.apply(lambda x: os.path.join(data_dir, f'{x}.jpg'))

    pseudo_df = pd.read_csv(f'{args.root_dir}/src_nih/pseudo_df.csv')

    if args.debug:
        df_train = df_train.sample(frac=0.1)
    target_cols = df_train.iloc[:, 1:12].columns.tolist()

    #dataset = RANZERDataset(df_train, 'train', transform=args.transforms_train)

    if 'efficientnet' in args.model_name:
        model = RANZCREffiNet(args.model_name, out_dim=len(target_cols), pretrained=True)
    elif 'vit' in args.model_name:
        model = RANZCRViT(args.model_name, out_dim=len(target_cols), pretrained=True)
    else:
        model = RANZCRResNet200D(args.model_name, out_dim=len(target_cols), pretrained=True)


    from collections import OrderedDict
    new_state_dict = OrderedDict()
    state_dict = torch.load(model_paths[args.fold_id])
    for k, v in state_dict.items():
        name = k[7:] # remove `module.`
        new_state_dict[name] = v
    model.load_state_dict(new_state_dict)
    model.fc = nn.Linear(model.fc.in_features, 11).to(device)
        
    if DP:
        model = apex.parallel.convert_syncbn_model(model)
    model = model.to(device)


    criterion = nn.BCEWithLogitsLoss()
    optimizer = optim.Adam(model.parameters(), lr=args.init_lr/args.warmup_factor)
    scheduler_cosine = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, args.n_epochs, eta_min=1e-7)
    scheduler_warmup = GradualWarmupSchedulerV2(optimizer, multiplier=10,
                                                total_epoch=args.warmup_epo,
                                                after_scheduler=scheduler_cosine)
    
    if args.use_amp:
        model, optimizer = amp.initialize(model, optimizer, opt_level="O1")
    if DP:
        model = nn.DataParallel(model)
    

    df_train_this = df_train[df_train['fold'] != args.fold_id]
    df_valid_this = df_train[df_train['fold'] == args.fold_id]

    transforms_train, transforms_valid = get_transforms(args.image_size)

    dataset_train = RANZERDataset(df_train_this, 'train', target_cols, transform=transforms_train)
    dataset_valid = RANZERDataset(df_valid_this, 'valid', target_cols, transform=transforms_valid)

    dataset_pseudo = RANZERDataset(pseudo_df, 'train', target_cols, transform=transforms_train)

    dataset_train = torch.utils.data.ConcatDataset([dataset_train, dataset_pseudo])

    train_loader = torch.utils.data.DataLoader(dataset_train,
                                               batch_size=args.batch_size,
                                               shuffle=True,
                                               num_workers=args.num_workers)

    valid_loader = torch.utils.data.DataLoader(dataset_valid,
                                               batch_size=args.valid_batch_size,
                                               shuffle=False,
                                               num_workers=args.num_workers)

    log = {}
    roc_auc_max = 0.
    loss_min = 99999
    not_improving = 0

    logger.write(f"{'#'*20} start training fold : {args.fold_id}\n")
    for epoch in range(1, args.n_epochs + 1):
        scheduler_warmup.step(epoch - 1)
        loss_train = train_func(train_loader, model, optimizer, criterion)
        loss_valid, roc_auc = valid_func(valid_loader, model, optimizer, criterion, target_cols)

        log['loss_train'] = log.get('loss_train', []) + [loss_train]
        log['loss_valid'] = log.get('loss_valid', []) + [loss_valid]
        log['lr'] = log.get('lr', []) + [optimizer.param_groups[0]["lr"]]
        log['roc_auc'] = log.get('roc_auc', []) + [roc_auc]

        content = time.ctime() + ' ' + f'Fold {args.fold_id}, Epoch {epoch}, ' \
                                       f'lr: {optimizer.param_groups[0]["lr"]:.7f}, ' \
                                       f'loss_train: {loss_train:.5f}, ' \
                                       f'loss_valid: {loss_valid:.5f}, ' \
                                       f'roc_auc: {roc_auc:.6f}.\n'
        #print(content)
        logger.write(content)
        not_improving += 1

        if roc_auc > roc_auc_max:
            logger.write(f'roc_auc_max ({roc_auc_max:.6f} --> {roc_auc:.6f}). Saving model ...\n')
            torch.save(model.state_dict(), f'{args.model_dir}/{args.model_name}_fold{args.fold_id}_best_AUC_{roc_auc_max:.4f}.pth')
            roc_auc_max = roc_auc
            not_improving = 0

        if loss_valid < loss_min:
            loss_min = loss_valid
            torch.save(model.state_dict(), f'{args.model_dir}/{args.model_name}_fold{args.fold_id}_best_loss_{loss_min:.4f}.pth')

        if not_improving == args.early_stop:
            logger.write('Early Stopping...')
            break

    torch.save(model.state_dict(), f'{args.model_dir}/{args.model_name}_fold{args.fold_id}_final.pth')
    with open(f'{args.log_dir}/logs.pickle', 'wb') as handle:
        pickle.dump(log, handle, protocol=pickle.HIGHEST_PROTOCOL)