def train(fold): training_data_path = TRAINING_DATA_PATH df = pd.read_csv(TRAIN_FOLDS) df_train = df[df.kfold != fold].reset_index(drop=True) df_valid = df[df.kfold == fold].reset_index(drop=True) model = ResNext101_64x4d(pretrained="imagenet") model.to(DEVICE) train_images = df_train.image_name.values.tolist() train_images = [ os.path.join(training_data_path, i + ".jpg") for i in train_images ] train_targets = df_train.target.values valid_images = df_valid.image_name.values.tolist() valid_images = [ os.path.join(training_data_path, i + ".jpg") for i in valid_images ] valid_targets = df_valid.target.values train_loader = train_dataloader(images=train_images, targets=train_targets) valid_loader = valid_dataloader(images=valid_images, targets=valid_targets) optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, threshold=0.001, mode="max") es = EarlyStopping(patience=3, mode="max") for epoch in range(EPOCHS): train_loss = Engine.train(train_loader, model, optimizer, device=DEVICE) predictions, valid_loss = Engine.evaluate(valid_loader, model, device=DEVICE) predictions = np.vstack((predictions)).ravel() auc = metrics.roc_auc_score(valid_targets, predictions) print(f"Epoch = {epoch}, AUC = {auc}") scheduler.step(auc) es(auc, model, model_path=os.path.join(MODEL_PATH, f"model_fold_{fold}.bin")) if es.early_stop: print("Early stopping") break
def train(fold): #config df = pd.read_csv(data_path + "/train_fold.csv") device = "cuda" if torch.cuda.is_available() else "cpu" # model model = SEResnext50_32x4d(pretrained=True) model = model.to(device) optimizer = torch.optim.SGD(model.parameters(), lr=1e-2, momentum=0.9, weight_decay=5e-4) scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.5) es = EarlyStopping(patience=5, mode="max") criterion = nn.CrossEntropyLoss().to(device) for epoch in range(epochs): # loop over the dataset multiple times losses = AverageMeter() # scheduler.step() for my_iter in range(fold, fold + 1, 1): train_loader, valid_loader = get_images_by_fold(df, my_iter) print("iter %d/5 load complete" % my_iter) tk0 = tqdm(train_loader, total=len(train_loader), disable=False) for i, data in enumerate(tk0): for key, value in data.items(): data[key] = value.to(device) # zero the parameter gradients optimizer.zero_grad() # forward + backward + optimize outputs = model(data["image"]) loss = criterion(outputs, data["targets"]) loss.backward() optimizer.step() #loss update losses.update(loss.item(), train_loader.batch_size) tk0.set_postfix(loss=losses.avg) # Check Accuracy acc = acc_check(model, valid_loader, epoch) scheduler.step(acc) es(acc, model, model_path=save_path + f"/200712_acc_{acc}.bin") if es.early_stop: print("early stop") break
# the last layer model.fc = nn.Linear(512, 57) # If the model already exists we load it in. if os.path.isfile("models/detecting_common_element/neurabble.pth"): model.load_state_dict( torch.load("models/detecting_common_element/neurabble.pth") ) criterion = nn.CrossEntropyLoss() optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau( optimizer, patience=3, threshold=0.001, mode="max" ) es = EarlyStopping(patience=5, mode="max") device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # Since we use early we the number for the range should # be pretty big in order to be sure that the model can # fit properly for epoch in range(50): # Engine is a utility class for training and evaluating the # model train_loss = Engine.train(trainloader, model, optimizer, device) valid_preds, valid_targets = Engine.evaluate(validloader, model, device) # Calculating the accuracy of our model category_pred = list() for i in range(len(valid_preds)):
def train(fold): training_data_path = "../input/siic-isic-224x224-images/train/" df = pd.read_csv("/kaggle/working/train_folds.csv") device = "cuda" epochs = 50 train_bs = 32 valid_bs = 16 df_train = df[df.kfold != fold].reset_index(drop=True) df_valid = df[df.kfold == fold].reset_index(drop=True) valid_targets = df_valid.target.values #print(valid_targets.shape) model = SEResnext50_32x4d(pretrained="imagenet") model.to(device) mean = (0.485, 0.456, 0.406) std = (0.229, 0.224, 0.225) transform_train = transforms.Compose([ transforms.ToPILImage(), transforms.RandomHorizontalFlip(p=0.5), transforms.RandomRotation(degrees=(-90, 90)), transforms.RandomVerticalFlip(p=0.5), transforms.ToTensor(), transforms.Normalize(mean, std), ]) transform_valid = transforms.Compose([ #transforms.ToPILImage(), transforms.ToTensor(), transforms.Normalize(mean, std), ]) train_data = MelenomaDataset(training_data_path, df_train, transform_train) valid_data = MelenomaDataset(training_data_path,df_valid,transform_valid) train_loader = DataLoader(train_data, batch_size= train_bs, shuffle =False, num_workers=4) valid_loader = DataLoader(valid_data,batch_size = valid_bs, shuffle = False, num_workers = 4) optimizer = torch.optim.Adam(model.parameters(), lr=1e-4) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau( optimizer, patience=3, threshold=0.001, mode="max" ) es = EarlyStopping(patience=5, mode="max") #loss_v , auc = validation(fold,valid_loader,valid_targets,model) for epoch in range(epochs): count = 0 tk0 = tqdm(train_loader, total=len(train_loader)) #print (tk0) losses =AverageMeter() for idx, pack in enumerate(tk0): if (idx == 0): optimizer.zero_grad() model.train() prediction = model(pack["image"].cuda()) loss = weighted_cross_entropy_loss(prediction,pack["label"]) #running_loss += loss.item()*pack["image"].size(0) loss.backward() optimizer.step() scheduler.step(loss) optimizer.zero_grad() losses.update(loss.item(), train_loader.batch_size) tk0.set_postfix(loss=losses.avg) print (epoch) print (f"training_loss for {epoch} = {losses.avg}") loss_v , auc = validation(fold,valid_loader,valid_targets,model) print (f"validation_loss for {epoch}= {loss_v}") print(f"Epoch = {epoch}, AUC = {auc}") es(auc, model, model_path=f"model_fold_{fold}.bin") if es.early_stop: print("Early stopping") break tk0.close() def predict(fold): test_data_path = "../input/siic-isic-224x224-images/test/" df = pd.read_csv("../input/siim-isic-melanoma-classification/test.csv") device = "cuda" model_path=f"model_fold_{fold}.bin" mean = (0.485, 0.456, 0.406) std = (0.229, 0.224, 0.225) aug = albumentations.Compose( [ albumentations.Normalize(mean, std, max_pixel_value=255.0, always_apply=True) ] ) images = df.image_name.values.tolist() images = [os.path.join(test_data_path, i + ".png") for i in images] targets = np.zeros(len(images)) test_dataset = ClassificationLoader( image_paths=images, targets=targets, resize=None, augmentations=aug, ) test_loader = torch.utils.data.DataLoader( test_dataset, batch_size=16, shuffle=False, num_workers=4 ) model = SEResnext50_32x4d(pretrained=None) model.load_state_dict(torch.load(model_path)) model.to(device) predictions = Engine.predict(test_loader, model, device=device) predictions = np.vstack((predictions)).ravel() return predictions if __name__ == '__main__': train(0) train(1) train(2) train(3) train(4) predict(0) predict(1) predict(2) predict(3) predict(4)
def train(fold): training_data_path = "../input/siic-isic-224x224-images/train/" df = pd.read_csv("/kaggle/working/train_folds.csv") device = "cuda" epochs = 50 train_bs = 32 valid_bs = 16 df_train = df[df.kfold != fold].reset_index(drop=True) df_valid = df[df.kfold == fold].reset_index(drop=True) model = SEResnext50_32x4d(pretrained="imagenet") model.to(device) mean = (0.485, 0.456, 0.406) std = (0.229, 0.224, 0.225) train_aug = albumentations.Compose( [ albumentations.Normalize(mean, std, max_pixel_value=255.0, always_apply=True), albumentations.ShiftScaleRotate(shift_limit=0.0625, scale_limit=0.1, rotate_limit=15), albumentations.Flip(p=0.5) ] ) valid_aug = albumentations.Compose( [ albumentations.Normalize(mean, std, max_pixel_value=255.0, always_apply=True) ] ) train_images = df_train.image_name.values.tolist() train_images = [os.path.join(training_data_path, i + ".png") for i in train_images] train_targets = df_train.target.values valid_images = df_valid.image_name.values.tolist() valid_images = [os.path.join(training_data_path, i + ".png") for i in valid_images] valid_targets = df_valid.target.values train_dataset = ClassificationLoader( image_paths=train_images, targets=train_targets, resize=None, augmentations=train_aug, ) train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=train_bs, shuffle=True, num_workers=4 ) valid_dataset = ClassificationLoader( image_paths=valid_images, targets=valid_targets, resize=None, augmentations=valid_aug, ) valid_loader = torch.utils.data.DataLoader( valid_dataset, batch_size=valid_bs, shuffle=False, num_workers=4 ) optimizer = torch.optim.Adam(model.parameters(), lr=1e-4) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau( optimizer, patience=3, threshold=0.001, mode="max" ) es = EarlyStopping(patience=5, mode="max") for epoch in range(epochs): train_loss = Engine.train(train_loader, model, optimizer, device=device) predictions, valid_loss = Engine.evaluate( valid_loader, model, device=device ) predictions = np.vstack((predictions)).ravel() auc = metrics.roc_auc_score(valid_targets, predictions) print(f"Epoch = {epoch}, AUC = {auc}") scheduler.step(auc) es(auc, model, model_path=f"model_fold_{fold}.bin") if es.early_stop: print("Early stopping") break
def train(): df = pd.read_csv(DATA_PATH + "images_labeled.csv") X_train, X_test, y_train, y_test = train_test_split(df.image.values, df.label.values, test_size=0.3, random_state=42, shuffle=True) train_aug = albumentations.Compose([ albumentations.Normalize(mean=MEAN, std=STD, max_pixel_value=255.0, always_apply=True) ]) valid_aug = albumentations.Compose([ albumentations.Normalize(mean=MEAN, std=STD, max_pixel_value=255.0, always_apply=True) ]) train_images = [os.path.join(DATA_PATH, filename) for filename in X_train] valid_images = [os.path.join(DATA_PATH, filename) for filename in X_test] train_dataset = ClassificationDataset( image_paths=train_images, targets=y_train, resize=None, augmentations=train_aug, ) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=TRAIN_BS, shuffle=True, num_workers=4) valid_dataset = ClassificationDataset( image_paths=valid_images, targets=y_test, resize=None, augmentations=valid_aug, ) valid_loader = torch.utils.data.DataLoader(valid_dataset, batch_size=VALID_BS, shuffle=False, num_workers=4) model = CV_Model(pretrained="imagenet") model.to(DEVICE) optimizer = torch.optim.Adam(model.parameters(), lr=1e-4) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, mode="max") es = EarlyStopping(patience=5, mode="max") for epoch in range(EPOCHS): engine = Engine(model=model, optimizer=optimizer, device=DEVICE) engine.train(train_loader) predictions = engine.predict(valid_loader) predictions = np.vstack((predictions)).ravel() auc = metrics.roc_auc_score(y_test, predictions) print(f"Epoch = {epoch}, AUC = {auc}") scheduler.step(auc) es(auc, model, model_path=os.path.join(MODEL_PATH, "model.bin")) if es.early_stop: print("Early stopping") break
def train(fold): # train image path training_data_path = input("Enter the train data path (resized image): ") # csv data path that was created from folds fold_csv_path = input("Enter the train_fold.csv file path: ") df = pd.read_csv(fold_csv_path) model_path = "model/" device = "cuda" epochs = 30 train_batch_size = 32 valid_batch_size = 16 mean = (0.485, 0.456, 0.225) standard_deviation = (0.229, 0.224, 0.225) df_train = df[df.kfold != fold].reset_index(drop=True) df_valid = df[df.kfold == fold].reset_index(drop=True) # normalize images train_aug = albumentations.Compose([ albumentations.Normalize(mean, std=standard_deviation, max_pixel_value=255.0, always_apply=True) ]) valid_aug = albumentations.Compose([ albumentations.Normalize(mean, std=standard_deviation, max_pixel_value=255.0, always_apply=True) ]) # train image mapping train_images = df_train.image_name.values.tolist() train_images = [ os.path.join(training_data_path, i + ".jpg") for i in train_images ] train_targets = df_train.target.values valid_images = df_train.image_name.values.tolist() valid_images = [ os.path.join(training_data_path, i + ".jpg") for i in valid_images ] valid_targets = df_valid.target.values # create train loader train_dataset = ClassificationLoader(image_paths=train_images, targets=train_targets, resize=None, augmentations=train_aug) # train loader train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=train_batch_size, shuffle=True, num_workers=4) # create valid dataset valid_dataset = ClassificationLoader(image_paths=valid_images, targets=valid_targets, resize=None, augmentations=valid_aug) # validation data loader valid_loader = torch.utils.data.DataLoader(valid_dataset, batch_size=valid_batch_size, shuffle=False, num_workers=4) # import model model = SEResNext(pretrained='imagenet') model.to(device) # optimizer = torch.optim.Adam(model.parameters(), lr=1e-4) # dynamic learning rate reducing based on validation measurements. scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau( # https://pytorch.org/docs/master/optim.html#torch.optim.lr_scheduler.ReduceLROnPlateau optimizer, patience=4, mode='max', ) # use apex for mixed precision training # amp: Automatic Mixed Precision model, optimizer = amp.initialize(model, optimizer, opt_level='01', verbosity=0) # earlystopping es = EarlyStopping(patience=5, mode='max') # train the train data # use thr wtfml module for calculating loss and evaluation for epoch in range(epochs): training_loss = Engine.train(train_loader, model, optimizer, device, fp16=True) predictions, valid_loss = Engine.evaluate(train_loader, model, optimizer, device) predictions = np.vstack((predictions).ravel()) auc = metrics.roc_auc_score(valid_targets, predictions) scheduler.step(auc) print(f"epoch = {epoch}, auc= {auc}") es(auc, model, os.path.join(model_path, f"model{fold}.bin")) if es.early_stop: print("Early Stopping") break
def train(fold): training_data_path = "/home/abhishek/workspace/melanoma/input/jpeg/train224/" model_path = "/home/abhishek/workspace/melanoma-deep-learning" df = pd.read_csv("/home/abhishek/workspace/melanoma/input/train_folds.csv") device = "cuda" epochs = 50 train_bs = 32 valid_bs = 16 mean = (0.485, 0.456, 0.406) std = (0.229, 0.224, 0.225) df_train = df[df.kfold != fold].reset_index(drop=True) df_valid = df[df.kfold == fold].reset_index(drop=True) train_aug = albumentations.Compose([ albumentations.Normalize(mean, std, max_pixel_value=255.0, always_apply=True), ]) valid_aug = albumentations.Compose([ albumentations.Normalize(mean, std, max_pixel_value=255.0, always_apply=True), ]) train_images = df_train.image_name.values.tolist() train_images = [ os.path.join(training_data_path, i + ".jpg") for i in train_images ] train_targets = df_train.target.values valid_images = df_valid.image_name.values.tolist() valid_images = [ os.path.join(training_data_path, i + ".jpg") for i in valid_images ] valid_targets = df_valid.target.values train_dataset = ClassificationLoader(image_paths=train_images, targets=train_targets, resize=None, augmentations=train_aug) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=train_bs, shuffle=True, num_workers=4) valid_dataset = ClassificationLoader(image_paths=valid_images, targets=valid_targets, resize=None, augmentations=valid_aug) valid_loader = torch.utils.data.DataLoader(valid_dataset, batch_size=valid_bs, shuffle=False, num_workers=4) model = SEResNext50_32x4d(pretrained="imagenet") model.to(device) optimizer = torch.optim.Adam(model.parameters(), lr=1e-4) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, mode="max") model, optimizer = amp.initialize(model, optimizer, opt_level="O1", verbosity=0) es = EarlyStopping(patience=5, mode="max") for epoch in range(epochs): training_loss = Engine.train(train_loader, model, optimizer, device, fp16=True) predictions, valid_loss = Engine.evaluate(train_loader, model, optimizer, device) predictions = np.vstack((predictions)).ravel() auc = metrics.roc_auc_score(valid_targets, predictions) scheduler.step(auc) print(f"epoch={epoch}, auc={auc}") es(auc, model, os.path.join(model_path, f"model{fold}.bin")) if es.early_stop: print("early stopping") break
def train(fold): training_data_path = '' model_path = "" df = pd.read_csv("/.../train_folds.csv") device = "cuda" epochs = 50 train_bs = 32 #train batch size valid_bs = 16 #normalize image pixel values mean = (0.485, 0.456, 0.406) #these values are for this model std = (0.229, 0.224, 0.225) df_train = df[df.kfold != fold].reset_index( drop=True) #absolutely removes the previous index df_valid = df[df.kfold == fold].reset_index(drop=True) #for image augmentation train_aug = albumentations.Compose([ albumentations.Normalize(mean, std, max_pixel_value=255.0, always_apply=True), ]) valid_aug = albumentations.Compose([ albumentations.Normalize(mean, std, max_pixel_value=255.0, always_apply=True), ]) train_images = df_train.image_name.values.tolist() train_images = [ os.path.join(training_data_path, i + '.jpg') for i in train_images ] train_targets = df_train.target.values valid_images = df_valid.image_name.values.tolist() valid_images = [ os.path.join(training_data_path, i + '.jpg') for i in valid_images ] valid_targets = df_valid.target.values train_dataset = ClassificationLoader(image_paths=train_images, targets=train_targets, resize=None, augmentation=train_aug) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=train_bs, shuffle=True, num_workers=4) valid_dataset = ClassificationLoader(image_paths=valid_images, targets=valid_targets, resize=None, augmentation=valid_aug) valid_loader = torch.utils.data.DataLoader(valid_dataset, batch_size=valid_bs, shuffle=True, num_workers=4) model = SEResNext50_32x4d(pretrained='imagenet') model.to(device) optimizer = torch.optim.Adam(model.parameters(), lr=1e-4) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau( # reduce learning rate if it plateaus at any level optimizer, patience=3, mode= "max" #max because we'll be using scheduler on AUC(area under ROC curve) ) model, optimizer = amp.initialize( #apex is used for mixed precision training, it trains faster with less memory model, optimizer, opt_level="01", verbosity=0) es = EarlyStopping(patience=5, mode="max") for epoch in range(epochs): training_loss = Engine.train(train_loader, model, optimizer, device, fp16=True) predictions, valid_loss = Engine.evaluate(train_loader, model, optimizer, device) predictions = np.vstack((predictions)).ravel() auc = metrics.roc_auc_score( valid_targets, predictions ) #this is why valid_data should not be shuffled as opposed to training data scheduler.step(auc) print("epoch={}, auc={}".format(epoch, auc)) es(auc, model, os.path.join(model_path, f"model{fold}.bin")) if es.early_stop: print('early stopping') break
def train(gpu, args): for i in range(10): fold = i rank = args.nr * args.gpus + gpu dist.init_process_group(backend='nccl', init_method='env://', world_size=args.world_size, rank=rank) torch.manual_seed(0) training_data_path = "" model_path = "" df = pd.read_csv("") epochs = 50 train_bs = 32 valid_bs = 16 mean = (0.485, 0.456, 0.406) std = (0.229, 0.224, 0.225) device = "cuda" df_train = df[df.kfold != fold].reset_index(drop=True) df_valid = df[df.kfold == fold].reset_index(drop=True) train_aug = albumentations.Compose([ albumentations.Normalize(mean, std, max_pixel_value=255.0, always_apply=True), albumentations.ShiftScaleRotate(shift_limit=0.0625, scale_limit=0.1, rotate_limit=15), albumentations.Flip(p=0.5), albumentations.RandomRotate90(p=0.5) ]) valid_aug = albumentations.Compose([ albumentations.Normalize(mean, std, max_pixel_value=255.0, always_apply=True), ]) train_images = df_train.image_name.values.tolist() train_images = [ os.path.join(training_data_path, i + ".jpg") for i in train_images ] train_targets = df_train.target.values valid_images = df_valid.image_name.values.tolist() valid_images = [ os.path.join(training_data_path, i + ".jpg") for i in valid_images ] valid_targets = df_valid.target.values model = SEResNext50_32x4d(pretrained="imagenet", gpu=gpu) torch.cuda.set_device(gpu) model.cuda(gpu) optimizer = torch.optim.Adam(model.parameters(), lr=1e-4) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, mode="max") model, optimizer = amp.initialize(model, optimizer, opt_level='O1') model = DDP(model) train_dataset = ClassificationDataset(image_paths=train_images, targets=train_targets, resize=None, augmentations=train_aug) train_sampler = torch.utils.data.distributed.DistributedSampler( train_dataset, num_replicas=args.world_size, rank=rank) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=train_bs, shuffle=False, num_workers=0, pin_memory=True, sampler=train_sampler) valid_dataset = ClassificationDataset(image_paths=valid_images, targets=valid_targets, resize=None, augmentations=valid_aug) valid_loader = torch.utils.data.DataLoader( valid_dataset, batch_size=valid_bs, shuffle=False, drop_last=False, num_workers=0, pin_memory=True, ) es = EarlyStopping(patience=5, mode="max") for epoch in range(epochs): training_loss = Engine.train(train_loader, model, optimizer, scheduler=scheduler, fp16=True) model.to(device) predictions, valid_loss = Engine.evaluate(valid_loader, model, device=device) predictions = np.vstack(predictions).ravel() auc = metrics.roc_auc_score(valid_targets, predictions) scheduler.step(auc) print(f"epoch={epoch}, auc={auc}") #dist.destroy_process_group() if torch.cuda.device_count() > 1 else None #torch.cuda.empty_cache() es(auc, model, os.path.join(model_path, f"model{fold}.bin")) if es.early_stop: print("early stopping") break
def train(fold): training_data_path = "/content/train_images/" df = pd.read_csv("train_folds.csv") model_path = "/content/checkpoints/" device = "cpu" epochs = 10 train_bs = 32 val_bs = 16 mean = (0.485, 0.456, 0.406) std = (0.229, 0.224, 0.225) df_train = df[df.kfold != fold].reset_index(drop=True) df_val = df[df.kfold == fold].reset_index(drop=True) train_aug = albumentations.Compose( [ albumentations.Normalize(mean, std, max_pixel_value=255, always_apply=True) ] ) val_aug = albumentations.Compose( [ albumentations.Normalize(mean, std, max_pixel_value=255, always_apply=True) ] ) train_images = df_train.image_name.values.tolist() train_images = [os.path.join(training_data_path, i + ".jpg") for i in train_images] train_targets = df_train.target.values val_images = df_val.image_name.values.tolist() val_images = [os.path.join(training_data_path, i + ".jpg") for i in val_images] val_targets = df_train.target.values train_dataset = ClassificationLoader( image_paths= train_images, targets = train_targets, resize=None, augmentations=train_aug ) train_loder = torch.utils.data.DataLoader( train_dataset, batch_size=train_bs, shuffle=False, num_workers=4 ) val_dataset = ClassificationLoader( image_paths= val_images, targets = val_targets, resize=None, augmentations=val_aug ) val_loder = torch.utils.data.DataLoader( val_dataset, batch_size=val_bs, num_workers=4 ) model = SEResNext50_32x4d(pretrained='imagenet') model.to(device) optimizer = torch.optim.Adam(model.parameters(), lr= 1e-4) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau( optimizer, patience=3, mode="max" ) es = EarlyStopping(patience=5, mode= "max") predictions = [] for epoch in range(epochs): training_loss = Engine.train(train_loder, model, optimizer, device=device) val_loss = engine.evaluate(val_loder) predictions = np.vstack((engine.predict(val_loder))).ravel() auc = metrics.roc_auc_score(val_targets, predictions) scheduler.step(auc) print(f"epoch = {epoch}, auc = {auc}") es(auc, model, model_path) if es.early_stop: print("early stopping") break
def train(fold): args = get_args() with open(args.config) as file: config_file = yaml.load(file, Loader=yaml.FullLoader) wandb.init( project="siim2020", entity="siim_melanoma", # name=f"20200718-effb0-adamw-consineaneal-{fold}", name=f"2017-2018-rexnet-test-{fold}", #name=f"swav-test-{fold}", #name=f"RAdam-b6-384x384-{fold}" ) config = wandb.config # Initialize config config.update(config_file) device = config.device model_path = config.model_path.format(fold) seed_everything(config.seed) df = pd.read_csv(config.train_csv_fold) df_train = df[df.kfold != fold].reset_index(drop=True) df_train["image_name"] = config.training_data_path + df_train[ "image_name"] + ".jpg" if config.supplement_data["use_supplement"]: print(f"training shape before merge {df_train.shape}") df_supplement = pd.read_csv(config.supplement_data["csv_file"]) df_supplement = df_supplement[df_supplement["tfrecord"] % 2 == 0] df_supplement = df_supplement[df_supplement["target"] == 1] df_supplement["image_name"] = (config.supplement_data["file_path"] + df_supplement["image_name"] + ".jpg") df_train = pd.concat([df_train, df_supplement]).reset_index(drop=True) df_train = df_train.sample( frac=1, random_state=config.seed).reset_index(drop=True) del df_supplement print(f"training shape after merge {df_train.shape}") df_valid = df[df.kfold == fold].reset_index(drop=True) df_valid["image_name"] = config.training_data_path + df_valid[ "image_name"] + ".jpg" if config.use_metadata: df_train, meta_features = get_meta_feature(df_train) df_valid, _ = get_meta_feature(df_valid) else: meta_features = None model = get_model( config.model_backbone, config.model_name, config.num_classes, config.input_size, config.use_metadata, meta_features, ) model = model.to(config.device) print("watching model") wandb.watch(model, log="all") mean = (0.485, 0.456, 0.406) std = (0.229, 0.224, 0.225) train_aug = albumentations.Compose([ AdvancedHairAugmentation(hairs_folder="../input/melanoma-hairs/"), # albumentations.augmentations.transforms.CenterCrop(64, 64, p=0.8), albumentations.augmentations.transforms.RandomBrightnessContrast(), albumentations.augmentations.transforms.HueSaturationValue(), # Microscope(p=0.4), albumentations.augmentations.transforms.RandomResizedCrop( config.input_size, config.input_size, scale=(0.7, 1.0), p=0.4), albumentations.augmentations.transforms.VerticalFlip(p=0.4), albumentations.augmentations.transforms.Cutout(p=0.3), # doesnt work albumentations.ShiftScaleRotate(shift_limit=0.0625, scale_limit=0.1, rotate_limit=15), albumentations.Flip(p=0.5), RandomAugMix(severity=7, width=7, alpha=5, p=0.3), # albumentations.augmentations.transforms.Resize( # config.input_size, config.input_size, p=1 # ), albumentations.Normalize(mean, std, max_pixel_value=255.0, always_apply=True), ]) valid_aug = albumentations.Compose([ albumentations.Normalize(mean, std, max_pixel_value=255.0, always_apply=True), ]) train_images = df_train.image_name.values.tolist() # train_images = [ # os.path.join(config.training_data_path, i + ".jpg") for i in train_images # ] train_targets = df_train.target.values valid_images = df_valid.image_name.values.tolist() # valid_images = [ # os.path.join(config.training_data_path, i + ".jpg") for i in valid_images # ] valid_targets = df_valid.target.values train_dataset = ClassificationDataset( image_paths=train_images, targets=train_targets, resize=None, augmentations=train_aug, meta_features=meta_features, df_meta_features=df_train, ) train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=config.batch_size, # num_workers=4, num_workers=1, pin_memory=True, shuffle=True, #sampler=BalanceClassSampler(labels=train_targets, mode="upsampling"), drop_last=True, ) valid_dataset = ClassificationDataset( image_paths=valid_images, targets=valid_targets, resize=None, augmentations=valid_aug, meta_features=meta_features, df_meta_features=df_valid, ) valid_loader = torch.utils.data.DataLoader( valid_dataset, batch_size=config.test_batch_size, shuffle=False, # num_workers=4, num_workers=1, pin_memory=True, # drop_last=True ) #optimizer = torch.optim.AdamW(model.parameters(), lr=config.lr) optimizer = RAdam(model.parameters(), lr=config.lr) if config.swa["use_swa"]: optimizer = SWA(optimizer, swa_start=12, swa_freq=1) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=2, threshold=0.0001, mode="max") # scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( # optimizer, len(train_loader) * config.epochs # ) #scheduler = torch.optim.lr_scheduler.CyclicLR( # optimizer, # base_lr=config.lr / 10, # max_lr=config.lr * 100, # mode="triangular2", # cycle_momentum=False, #) #scheduler = torch.optim.lr_scheduler.OneCycleLR( # optimizer, max_lr=3e-3, steps_per_epoch=len(train_loader), epochs=config.epochs #) es = EarlyStopping(patience=6, mode="max") if config.fp16: print("************* using fp16 *************") scaler = GradScaler() else: scaler = False for epoch in range(config.epochs): train_loss = Engine.train( train_loader, model, optimizer, device=config.device, wandb=wandb, accumulation_steps=config.accumulation_steps, fp16=config.fp16, scaler=scaler, ) predictions, valid_loss = Engine.evaluate( valid_loader, model, device=config.device, wandb=wandb, epoch=epoch, upload_image=False, use_sigmoid=True, ) predictions = np.vstack((predictions)).ravel() auc = metrics.roc_auc_score(valid_targets, predictions) print(f"Epoch = {epoch}, AUC = {auc}") wandb.log({ "valid_auc": auc, }) scheduler.step(auc) es(auc, model, model_path=model_path) if es.early_stop: print("Early stopping") break if config.swa["use_swa"]: print("saving the model using SWA") optimizer.swap_swa_sgd() torch.save(model.state_dict(), config.swa["model_path"].format(fold)) evaluate_for_best_epoch( fold, model_path, config.device, valid_loader, config.model_name, valid_targets, "final", meta_features=meta_features, ) if config.swa["use_swa"]: model_path = config.swa["model_path"].format(fold) evaluate_for_best_epoch( fold, model_path, config.device, valid_loader, config.model_name, valid_targets, "swa", meta_features=meta_features, )
def train(fold): training_data_path = "/home/sushi/code/Kaggle/Melanoma-Detection-/input/jpeg/train224" df = pd.read_csv( "/home/sushi/code/Kaggle/Melanoma-Detection-/input/train_folds.csv") model_path = "/home/sushi/code/Kaggle/Melanoma-Detection-/models" device = "cuda" epochs = 50 train_bs = 32 valid_bs = 16 df_train = df[df.kfold != fold].reset_index(drop=True) df_valid = df[df.kfold == fold].reset_index(drop=True) train_aug = albumentations.Compose( albumentations.Normalize(always_apply=True)) valid_aug = albumentations.Compose( albumentations.Normalize(always_apply=True)) train_images = df_train.image_name.values.tolist() train_images = [ os.path.join(training_data_path, i + ".jpg") for i in train_images ] train_targets = df_train.targets.values valid_images = df_valid.image_name.values.tolist() valid_images = [ os.path.join(training_data_path, i + "jpg") for i in valid_images ] valid_targets = df_valid.target.values train_dataset = ClassificationLoader(image_paths=train_images, targets=train_targets, resize=None, augmentations=train_aug) valid_dataset = ClassificationLoader(image_paths=valid_images, targets=train_targets, resize=None, augmentations=valid_aug) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=train_bs, shuffle=False, num_workers=4) valid_loader = torch.utils.data.DataLoader(valid_dataset, batch_size=train_bs, shuffle=False, num_workers=4) model = SEResNext50_32x4d(pretrained="imagenet") optimizer = torch.optim.Adam(model.parameters(), lr=1e-4) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, mode="max") #model,optimizer = amp.initialize( # model, # optimizer, # opt_level ="01", # verbosity=0 #) es = EarlyStopping(patience=5, mode="max") for epoch in range(epochs): training_loss = Engine.train(train_loader, model, optimizer, device, fp16=True) predictions, valid_loss = Engine.evaluate( train_loader, model, optimizer, device, ) predictions = np.vstack((predictions)).ravel() auc = metrics.roc_auc_score(valid_targets, predictions) scheduler.step(auc) print(f"epoch={epoch}, auc:{auc}") es(auc, model, model_path) if es.early_stop: print("early stopping") break