def create_data_loaders(fold, batch_size, workers): train_ids, val_ids = dataset.get_split(fold) labeled_size = len(train_ids) unlabeled_size = 18000 sampler = dataset.TwoStreamBatchSampler( range(labeled_size), # labeled ids list(range(labeled_size, labeled_size + unlabeled_size)), # unlabeled ids batch_size, # total batch size (labeled + unlabeled) LABELED_BATCH_SIZE # labeled batch size # TODO: was .5 ) train_loader = dataset.DataLoader( dataset=dataset.MeanTeacherTGSDataset( train_ids, transform=dataset.train_transform(), mode='train'), # shuffle=True, num_workers=workers, batch_sampler=sampler, pin_memory=torch.cuda.is_available()) valid_loader = dataset.make_loader( val_ids, transform=dataset.val_transform(), shuffle=False, # batch_size=len(device_ids), batch_size=batch_size, # len(device_ids), workers=workers) return train_loader, valid_loader
def preprocess(news, bi_cls): token_type_ids = [] attention_mask = [] input_ids = [] news_contents = get_split(news) for news_content in news_contents: if bi_cls: news_content = '[CLS]金錢[SEP]犯罪[SEP]' + news_content + '[SEP]' else: news_content = '[CLS]金錢[SEP]犯人[SEP]' + news_content + '[SEP]' tokens = tokenizer.tokenize(news_content) input_id = tokenizer.convert_tokens_to_ids(tokens) token_type_id = create_token_type(input_id) att_mask = create_mask(input_id) input_ids.append(input_id) token_type_ids.append(token_type_id) attention_mask.append(att_mask) f = lambda x: pad_sequences( x, maxlen=512, dtype='long', truncating='post', padding='post') input_ids = f(input_ids) token_type_ids = f(token_type_ids) attention_mask = f(attention_mask) return input_ids, token_type_ids, attention_mask
def predict(path, kind='student', batch_size=BATCH_SIZE, fold=-1, workers=8): with open(os.path.join(path, 'params.json'), 'r') as f: config = json.loads(f.read()) model_type = config['model'] test_ids = dataset.get_test_ids() folds = list(range(5)) if fold == -1 else [fold] for fold in folds: print('processing fold ', fold) model_path = os.path.join(path, f"model_{fold}.{kind}.pth") model = models.get_model(model_path, model_type=model_type) model.eval() print('loaded.') print('predicting val set') val_output = os.path.join(path, f"val_preds_fold{fold}.npy") _, val_ids = dataset.get_split(fold) predict_tta(model, val_ids, val_output, kind='val', upside_down=True) print('predicting test set') test_output = os.path.join(path, f"test_preds_fold{fold}.npy") predict_tta(model, test_ids, test_output, kind='test', upside_down=True)
def main(write_submission=True): experiments = { # '../data/runs/resnet50-s224-v2': 1, #'../data/runs/bninception-s512': 1, '../data/runs/94499a78a0304845b4e885025560aba1': 1 } train_df = pd.read_csv(os.path.join(dataset.PATH, 'train.csv')) preds = np.zeros((11702, 28), dtype=np.float32) for exp, weight in experiments.items(): print(f'processing experiment {exp}') # # load validation & test set predictions # folds = [0] # list(range(5)) oof_pred = np.zeros((31072, 28), dtype=np.float32) fold_preds = [] for fold in folds: _, val_ids = dataset.get_split(fold) val_idx = np.array(train_df[train_df.Id.isin(val_ids)].index) oof_pred[val_idx] = np.load(os.path.join(exp, f"val_preds_fold{fold}.npy")) fold_preds.append(np.load(os.path.join(exp, f"test_preds_fold{fold}.npy"))) # find threshold per class using oof val_thresholds, val_f1 = evaluate(oof_pred) print('blended val_f1: ', val_f1) # folds mean fold_preds = np.sum(fold_preds, axis=0) / len(folds) # add thresholded with weights to final array for klazz, klazz_thr in val_thresholds.items(): preds[:, klazz] += weight * (fold_preds[:, klazz] > klazz_thr) # todo: replace with majority voting final = np.round(1. * preds / sum(experiments.values())) np.save('preds.npy', final) if write_submission: output_csv = f'../submissions/{datetime.datetime.now().strftime("%Y%m%d%H%M")}.csv' print('writing to ', output_csv) generate_submission(output_csv, final) print('done.')
def eval_fold(experiment_path, fold): model_path = os.path.join(experiment_path, f"model_{fold}.pth") _, val_ids = dataset.get_split(fold) with open(os.path.join(experiment_path, f"val_preds_fold{fold}.npy"), "rb") as f: val_preds = np.load(f) train_df = pd.read_csv(os.path.join('../input', 'train.csv')) val_target = {} for id_, label in train_df[['Id', 'Target']].values: y = [0] * dataset.NUM_CLASSES for i in label.split(' '): y[int(i)] = 1 val_target[id_] = y targets = np.array([val_target[id_] for id_ in val_ids], np.uint8) valid_thresholds, valid_f1 = validation.f1_macro(targets, val_preds, debug=False) #with open(f"val_preds_fold{fold}_thresholds.json", 'w') as f: # f.write(json.dumps(valid_thresholds, indent=4)) return valid_thresholds, valid_f1
def main(): parser = argparse.ArgumentParser() arg = parser.add_argument arg('--name', type=str) arg('--jaccard-weight', default=0.25, type=float) arg('--device-ids', type=str, default='0', help='For example 0,1 to run on two GPUs') arg('--fold', type=int, help='fold', default=0) arg('--output-dir', default='../data/runs', help='checkpoint root') arg('--batch-size', type=int, default=32) arg('--iter-size', type=int, default=1) arg('--n-epochs', type=int, default=100) arg('--lr', type=float, default=0.0001) arg('--workers', type=int, default=4) arg('--seed', type=int, default=0) arg('--model', type=str, default=models.archs[0], choices=models.archs) arg('--loss', type=str, default='focal', choices=[ 'focal', 'lovasz', 'bjd', 'bce_jaccard', 'bce_dice', 'cos_dice', 'hinge' ]) arg('--focal-gamma', type=float, default=.5) arg('--num-channels', type=int, default=3) arg('--weighted-sampler', action="store_true") arg('--ignore-empty-masks', action='store_true') arg('--remove-suspicious', action='store_true') arg('--resume', action="store_true") args = parser.parse_args() random.seed(args.seed) torch.manual_seed(args.seed) if not args.name: experiment = uuid.uuid4().hex else: experiment = args.name output_dir = Path(args.output_dir) / experiment output_dir.mkdir(exist_ok=True, parents=True) output_dir.joinpath('params.json').write_text( json.dumps(vars(args), indent=True, sort_keys=True)) # in case --resume is provided it will be loaded later model = models.get_model(None, args.model) # model = models.get_model(f"../data/runs/exp81/model_{args.fold}.pth", args.model) if torch.cuda.is_available(): if args.device_ids: device_ids = list(map(int, args.device_ids.split(','))) else: device_ids = None model = nn.DataParallel(model, device_ids=device_ids).cuda() train_ids, val_ids = dataset.get_split(args.fold) cudnn.benchmark = True train_loader = dataset.make_loader( train_ids, num_channels=args.num_channels, transform=dataset.train_transform(), shuffle=True, weighted_sampling=args.weighted_sampler, ignore_empty_masks=args.ignore_empty_masks, remove_suspicious=args.remove_suspicious, batch_size=args.batch_size, workers=args.workers) valid_loader = dataset.make_loader( val_ids, num_channels=args.num_channels, transform=dataset.val_transform(), shuffle=False, #batch_size=len(device_ids), batch_size=args.batch_size, # len(device_ids), workers=args.workers) # optimizer = Adam([p for p in model.parameters() if p.requires_grad], lr=args.lr) optimizer = Adam(model.parameters(), lr=args.lr) # loss = LossBinary(jaccard_weight=args.jaccard_weight) # loss = LossBinaryMixedDiceBCE(dice_weight=0.5, bce_weight=0.5) if args.loss == 'focal': loss = FocalLoss(args.focal_gamma) elif args.loss == 'lovasz': loss = LossLovasz() elif args.loss == 'bjd': loss = BCEDiceJaccardLoss({'bce': 0.25, 'jaccard': None, 'dice': 0.75}) elif args.loss == 'bce_jaccard': loss = LossBinary(args.jaccard_weight) elif args.loss == 'bce_dice': import loss2 bce_weight = 1 dice_weight = 2 loss = loss2.make_loss(bce_weight, dice_weight) elif args.loss == 'cos_dice': import loss2 loss = loss2.make_cos_dice_loss() elif args.loss == 'hinge': loss = LossHinge() else: raise NotImplementedError validation = validation_binary scheduler = ReduceLROnPlateau(optimizer, verbose=True, min_lr=1e-7, factor=0.5) snapshot = utils.fold_snapshot(output_dir, args.fold) if args.resume else None utils.train(experiment=experiment, output_dir=output_dir, optimizer=optimizer, args=args, model=model, criterion=loss, scheduler=scheduler, train_loader=train_loader, valid_loader=valid_loader, validation=validation, fold=args.fold, batch_size=args.batch_size, n_epochs=args.n_epochs, snapshot=snapshot, iter_size=args.iter_size)
def main(write_submission=True): experiments = {} experiments_0861 = { '../data/runs/exp86/m2': 10, '../data/runs/exp81': 1, '../data/runs/exp77/m2': 1, '../data/runs/exp77/m3': 1, '../data/subm075/exp75/m3': 1, '../data/subm068': 3, '../data/subm066': 1, '../data/subm056': 1, '../data/subm055': 1, '../data/subm054': 1, '../data/subm052': 1, '../data/subm049': 1, '../data/subm048': 1, '../data/subm045': 1, '../data/subm036': 1, '../data/subm034': 1, '../data/subm033': 1, '../data/subm032': 1, '../data/subm031': 1, '../data/subm028': 1, } experiments_0860 = { '../data/runs/exp86/m1': 2, '../data/runs/exp77/m2': 1, '../data/runs/exp77/m3': 1, '../data/subm075/exp75/m1': 1, '../data/subm075/exp75/m2': 1, '../data/subm075/exp75/m3': 1, '../data/subm068': 3, '../data/subm049': 1, '../data/subm048': 1, '../data/subm045': 1, '../data/subm036': 1, } experiments_0859 = { '../data/runs/exp81': 2, '../data/runs/exp77/m2': 1, '../data/runs/exp77/m3': 1, '../data/subm075/exp75/m1': 1, '../data/subm075/exp75/m2': 1, '../data/subm075/exp75/m3': 1, '../data/subm068': 3, '../data/subm049': 1, '../data/subm048': 1, '../data/subm045': 1, '../data/subm036': 1, } experiment_0856 = { '../data/runs/exp77/m2': 1, '../data/runs/exp77/m3': 1, '../data/subm075/exp75/m1': 1, '../data/subm075/exp75/m2': 1, '../data/subm075/exp75/m3': 1, '../data/subm068': 3, '../data/subm049': 1, '../data/subm048': 1, '../data/subm045': 1, '../data/subm036': 1, } experiments__ = { '../data/runs/exp77': 1, '../data/subm075/exp75/m1': 1, '../data/subm075/exp75/m2': 1, '../data/subm075/exp75/m3': 1, '../data/subm068': 3, '../data/subm049': 1, '../data/subm048': 1, '../data/subm045': 1, '../data/subm036': 1, } experiments_ = { '../data/runs/exp75/m1': 1, '../data/runs/exp75/m2': 1, '../data/runs/exp75/m3': 1, '../data/subm068': 2, '../data/subm049': 1, '../data/subm048': 1, '../data/subm045': 1, '../data/subm036': 1, } experiments2_ = { '../data/subm068': 2, #'../data/subm066': 2, #'../data/subm055': 2, '../data/subm049': 1, '../data/subm048': 1, '../data/subm045': 1, '../data/subm036': 1, } experiments_old = { '../data/subm066': 6, '../data/subm055': 6, '../data/subm056': 1, '../data/subm054': 1, '../data/subm052': 1, '../data/subm049': 4, '../data/subm048': 2, '../data/subm045': 2, #'../data/subm043': 1, #'../data/subm040': 1, '../data/subm036': 2, '../data/subm034': 1, '../data/subm033': 1, '../data/subm032': 1, '../data/subm031': 1, #'../data/subm029': 1, '../data/subm028': 1, #'../data/subm026': 1, '../data/subm025': 1, #'../data/subm024': 1, '../data/subm023': 1, '../data/subm022': 1, #'../data/subm020': 1, #'../data/subm019': 1, } preds = np.zeros((18000, 101, 101, 1), dtype=np.float32) folds = [4] # list(range(5)) for fold in folds: print('processing fold ', fold) # merge fold predictions for val set val_preds = [] for exp, weight in experiments.items(): val_preds.append( weight * np.load(os.path.join(exp, f"val_preds_fold{fold}.npy"))) # weighted average val_preds = np.sum(val_preds, axis=0) / sum(experiments.values()) # find threshold _, filenames = dataset.get_split(fold) masks = np.array([load_train_mask(image_id) for image_id in filenames]) thres = np.linspace(0.2, 0.6, 20) thres_ioc = [ iou_metric_batch(masks, np.int32(val_preds > t)) for t in thres ] best_thres_tta = thres[np.argmax(thres_ioc)] print(f"fold {fold} -- iou: ", best_thres_tta, max(thres_ioc)) fold_preds = [] for exp, weight in experiments.items(): fold_preds.append( weight * np.load(os.path.join(exp, f"test_preds_fold{fold}.npy"))) # simple average fold_preds = np.sum(fold_preds, axis=0) / sum(experiments.values()) fold_preds_thresholded = (fold_preds > best_thres_tta).astype(np.uint8) preds += fold_preds_thresholded #np.save('raw_preds.npy', preds/5.0) # majority voting final = np.round(1. * preds / len(folds)).astype(np.uint8) # post processing # gave .001 improvement from .843 to .844 # right now it's making preds worse. if False: for idx in range(final.shape[0]): amount = np.sum(final[idx]) # find exact threshold on validation set using blend if 0 < amount <= 10: final[idx] = 0 if write_submission: output_csv = '../submissions/subm_089.csv' print('writing to ', output_csv) generate_submission(output_csv, final) print('done.')
def main(): args = build_train_args() random.seed(args.seed) torch.manual_seed(args.seed) experiment = args.name if args.name else uuid.uuid4().hex output_dir = Path(args.output_dir) / experiment output_dir.mkdir(exist_ok=True, parents=True) output_dir.joinpath('params.json').write_text( json.dumps(vars(args), indent=True, sort_keys=True)) # this is different than --resume, which loads optimizer state as well initial_model_path = os.path.join( output_dir, f"model_{args.fold}.pth") if args.load_weights else None model = models.get_model(initial_model_path, args.model) train_ids, val_ids = dataset.get_split(args.fold) cudnn.benchmark = True train_loader = dataset.make_loader(train_ids, num_channels=args.num_channels, transform=dataset.train_transform(), shuffle=True, weighted_sampling=args.weighted_sampler, batch_size=args.batch_size, workers=args.workers) valid_loader = dataset.make_loader( val_ids, num_channels=args.num_channels, transform=dataset.val_transform(), shuffle=False, weighted_sampling=False, batch_size=args.batch_size, # len(device_ids), workers=args.workers) # train last layer only # for m in model.children(): # if m in [model.fc]: # continue # for param in m.parameters(): # param.requires_grad = False # set all layers except fc to lr=0 #fc_params = list(map(id, model.fc.parameters())) #base_params = filter(lambda p: id(p) not in fc_params, model.parameters()) #optimizer = torch.optim.Adam([ # {'params': base_params, 'lr': args.lr * 0.001}, # {'params': model.fc.parameters(), 'lr': args.lr} #], lr=args.lr * 0.001) optimizer = torch.optim.Adam(model.parameters(), lr=args.lr) # optimizer = torch.optim.SGD(model.parameters(), lr=0.03, momentum=0.9, weight_decay=1e-4) # criterion = FocalLoss(gamma=args.focal_gamma) criterion = nn.BCEWithLogitsLoss().cuda() # criterion = losses.f1_loss validation_fn = validation_multi #scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau( # optimizer, mode='max', verbose=True, min_lr=1e-7, factor=0.5, patience=5) scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1) snapshot = utils.fold_snapshot(output_dir, args.fold) if args.resume else None device_ids = list(map( int, args.device_ids.split(','))) if args.device_ids else None wrapped_model = nn.DataParallel(model, device_ids=device_ids).cuda() # unfreeze # for m in model.children(): # for param in m.parameters(): # param.requires_grad = True # fc1_params = list(map(id, model.fc1.parameters())) # fc2_params = list(map(id, model.fc2.parameters())) # base_params = filter(lambda p: id(p) not in fc1_params and id(p) not in fc2_params, model.parameters()) # # # fc at args.lr and rest with args.lr * 0.1 # optimizer = torch.optim.Adam([ # {'params': base_params}, # {'params': model.fc1.parameters(), 'lr': args.lr}, # {'params': model.fc2.parameters(), 'lr': args.lr}, # ], lr=args.lr * 0.1) # aternatively, add the unfrozen fc2 weight to the current optimizer # optimizer.add_param_group({'params': net.fc2.parameters()}) utils.train(experiment=experiment, output_dir=output_dir, optimizer=optimizer, args=args, model=wrapped_model, criterion=criterion, scheduler=scheduler, train_loader=train_loader, valid_loader=valid_loader, validation_fn=validation_fn, fold=args.fold, batch_size=args.batch_size, n_epochs=args.n_epochs, snapshot=snapshot, iter_size=args.iter_size)
adding_noise = True gaussian_std = 0.05 n_epochs = 200 report_each = 1200 train_transform = DualCompose([ Resize(size=img_size), HorizontalFlip(), VerticalFlip(), ColorizationNormalize() ]) valid_transform = DualCompose([Resize(size=img_size), ColorizationNormalize()]) fold = 0 train_file_names, val_file_names = get_split(fold=fold) batch_size = 6 num_workers = 4 train_loader = DataLoader(dataset=ColorizationDataset( file_names=train_file_names, transform=train_transform, to_augment=True), shuffle=True, num_workers=num_workers, batch_size=batch_size) dataset_length = len(train_loader) valid_loader = DataLoader(dataset=ColorizationDataset( file_names=val_file_names, transform=valid_transform, to_augment=True), shuffle=False, num_workers=num_workers, batch_size=batch_size)
arg('--workers', type=int, default=8) args = parser.parse_args() with open(os.path.join(args.path, 'params.json'), 'r') as f: config = json.loads(f.read()) model_type = config['model'] test_ids = dataset.get_test_ids() folds = list(range(5)) if args.fold == -1 else [args.fold] for fold in folds: print('processing fold ', fold) model_path = os.path.join(args.path, f"model_{fold}.pth") model = models.get_model(model_path, model_type=model_type) model.eval() print('loaded.') print('predicting val set') val_output = os.path.join(args.path, f"val_preds_fold{fold}.npy") _, val_ids = dataset.get_split(fold) predict_tta(model, val_ids, val_output, kind='val', batch_size=args.batch_size) print('predicting test set') test_output = os.path.join(args.path, f"test_preds_fold{fold}.npy") predict_tta(model, test_ids, test_output, batch_size=args.batch_size)
cudnn.benchmark = True optimizer = Adam([p for p in model.parameters() if p.requires_grad], lr=1e-6) loss = loss = LossLovasz() validation = validation_binary scheduler = ReduceLROnPlateau(optimizer, verbose=True, min_lr=1e-7, factor=0.5, patience=5) output_dir = Path('tmp') output_dir.mkdir(exist_ok=True, parents=True) train_ids, val_ids = dataset.get_split(fold) train_loader = DataLoader(dataset=TGSDataset( train_ids, num_channels=3, transform=dataset.val_transform(), mode='train'), shuffle=True, num_workers=4, batch_size=24, sampler=None, pin_memory=torch.cuda.is_available()) val_loader = DataLoader(dataset=TGSDataset( val_ids, num_channels=3, transform=dataset.val_transform(), mode='train'),