def loop_train(fold=0): ready_train_info(fold) train_df = train[train.fold != fold].reset_index(drop=True) #[0:32] valid_df = train[train.fold == fold].reset_index(drop=True) #[0:32] imgs = train_df.image_id.values.tolist() path = "E:/kaggle_imgs/Plant-pathology-2020/images_224_224/" train_imgs = [path + file + ".png" for file in imgs] train_aug = Utils.get_aug("train") #train_tar=train_df[["healthy","multiple_diseases","rust","scab"]].values train_tar = train_df.CAT.values train_dataset = ClassificationLoader(image_paths=train_imgs, targets=train_tar, resize=None, augmentations=train_aug) CutMix_train_dataloader = CutMix(train_dataset, num_class=4, beta=1.0, prob=0.999, num_mix=1) train_loader = torch.utils.data.DataLoader(CutMix_train_dataloader, batch_size=bs_train, num_workers=4, shuffle=True) imgs = valid_df.image_id.values.tolist() path = "E:/kaggle_imgs/Plant-pathology-2020/images_224_224/" valid_imgs = [path + file + ".png" for file in imgs] valid_aug = Utils.get_aug("valid") #valid_tar=valid_df[["healthy","multiple_diseases","rust","scab"]].values valid_tar = valid_df.CAT.values valid_dataset = ClassificationLoader(image_paths=valid_imgs, targets=valid_tar, resize=None, augmentations=valid_aug) CutMix_valid_dataloader = CutMix(valid_dataset, num_class=4, beta=1.0, prob=0, num_mix=1) valid_loader = torch.utils.data.DataLoader(CutMix_valid_dataloader, batch_size=bs_valid, num_workers=4, shuffle=False) # Model,Optimizer, scheduler, engine model = Utils.get_model("effinet") device = "cuda" if torch.cuda.is_available() else "cpu" model = model.to(device) optimizer = torch.optim.Adam(model.parameters(), lr=1e-4) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, threshold=1e-5, mode="min", verbose=True) engine = Engine(model, optimizer, device) best_loss = np.inf early_stopping = 7 #3 early_stopping_cnt = 0 EPOCH = 300 for epoch in range(EPOCH): train_loss, train_acc = engine.train(train_loader) valid_loss, valid_acc = engine.validate(valid_loader) scheduler.step(valid_loss) # Add train Info add_train_info(fold, train_acc, train_loss, valid_acc, valid_loss) if valid_loss < best_loss: best_loss = valid_loss torch.save(model.state_dict(), f"model_fold_{fold}.bin") tm = datetime.datetime.now().strftime("%H:%M:%S") print( f"{tm}, fold={fold}, epoch={epoch}, train_loss={train_loss:.6f}, valid_loss={valid_loss:.6f}" ) early_stopping_cnt = 0 else: early_stopping_cnt += 1 if early_stopping_cnt > early_stopping: break print(f"fold={fold}, best val loss={best_loss}")
def loop_train(fold, model_name,weights,sel_weight): history.initial_info(sel_weight) train_df=train[train.fold!=fold].reset_index(drop=True)#[0:65] valid_df=train[train.fold==fold].reset_index(drop=True)#[0:64] train_df["result"]=train_df["CAT"].apply(lambda x : x>0).astype(np.int) valid_df["result"]=valid_df["CAT"].apply(lambda x : x>0).astype(np.int) imgs=train_df.image_id.values.tolist() path="E:/kaggle_imgs/Plant-pathology-2020/images_224_224/" train_imgs=[path+file+".png" for file in imgs] train_aug=Utils.get_aug("train") train_tar=train_df.result.values train_dataset=ClassificationLoader( image_paths=train_imgs,targets=train_tar,resize=None,augmentations=train_aug ) # CutMix_train_dataloader = CutMix(train_dataset, # num_class=4, # beta=1.0, # prob=0.999, # num_mix=1) CutMix_train_dataloader=train_dataset train_loader=torch.utils.data.DataLoader( CutMix_train_dataloader,batch_size=bs_train,num_workers=4,shuffle=True ) imgs=valid_df.image_id.values.tolist() path="E:/kaggle_imgs/Plant-pathology-2020/images_224_224/" valid_imgs=[path+file+".png" for file in imgs] valid_aug=Utils.get_aug("valid") valid_tar=valid_df.result.values valid_dataset=ClassificationLoader( image_paths=valid_imgs,targets=valid_tar,resize=None,augmentations=valid_aug ) # CutMix_valid_dataloader = CutMix(valid_dataset, # num_class=4, # beta=1.0, # prob=0, # num_mix=1) CutMix_valid_dataloader=valid_dataset valid_loader=torch.utils.data.DataLoader( CutMix_valid_dataloader,batch_size=bs_valid,num_workers=4,shuffle=False ) # Model,Optimizer, scheduler, engine model=get_effinet(classes=2) device="cuda" if torch.cuda.is_available() else "cpu" model=model.to(device) optimizer=torch.optim.Adam(model.parameters(),lr=1e-4) scheduler=torch.optim.lr_scheduler.ReduceLROnPlateau( optimizer,patience=3,threshold=1e-5,mode="min",verbose=True ) engine=Engine(model,optimizer,device,classes=2,weights=weights) best_loss=np.inf early_stopping=3#3 early_stopping_cnt=0 EPOCH=300 for epoch in range(EPOCH): train_loss,train_acc=engine.train(train_loader) valid_loss,valid_acc,valid_labels,valid_preds=engine.validate(valid_loader) scheduler.step(valid_loss) # Add train Info history.add_train_info(sel_weight,train_acc,train_loss,valid_acc,valid_loss,valid_labels,valid_preds) tm=datetime.datetime.now().strftime("%H:%M:%S") print(f"{tm}, fold={fold}, epoch={epoch}, train_loss={train_loss:.4f}, valid_loss={valid_loss:.4f}, valid_acc={valid_acc:.4f}") if valid_loss<best_loss : best_loss=valid_loss torch.save(model.state_dict(),f"model_fold_{fold}.bin") early_stopping_cnt=0 history.best_idx[fold]=epoch else: early_stopping_cnt+=1 if early_stopping_cnt>early_stopping: break print(f"fold={fold}, best val loss={best_loss}")
def loop_train(fold, weights, sel_pos): train = pd.read_csv("E:/kaggle_imgs/H2/data/train_fold.csv") history.initial_info(sel_pos) train_df = train[train.fold != fold].reset_index(drop=True)[:100] valid_df = train[train.fold == fold].reset_index(drop=True)[:80] train_df["result"] = train_df["category"].apply(lambda x: x > 0).astype( np.int) valid_df["result"] = valid_df["category"].apply(lambda x: x > 0).astype( np.int) train_imgs = train_df.tar_path.values.tolist() train_aug = Utils.get_aug("train") train_tar = train_df.result.values train_dataset = ClassificationLoader(image_paths=train_imgs, targets=train_tar, resize=None, augmentations=train_aug) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=bs_train, num_workers=0, shuffle=True) valid_imgs = valid_df.tar_path.values.tolist() valid_aug = Utils.get_aug("valid") valid_tar = valid_df.result.values valid_dataset = ClassificationLoader(image_paths=valid_imgs, targets=valid_tar, resize=None, augmentations=valid_aug) valid_loader = torch.utils.data.DataLoader(valid_dataset, batch_size=bs_valid, num_workers=0, shuffle=False) model = get_model_effi_b4(classes=2) device = "cuda" if torch.cuda.is_available() else "cpu" model = model.to(device) optimizer = torch.optim.Adam(model.parameters(), lr=1e-4) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, threshold=1e-5, mode="min", verbose=True) engine = Engine(model, optimizer, device, classes=1, weights=weights) best_loss = np.inf early_stopping = 3 #3 early_stopping_cnt = 0 EPOCH = 3 #300 for epoch in range(EPOCH): train_loss, train_acc, train_labels, train_preds = engine.train( train_loader) valid_loss, valid_acc, valid_labels, valid_preds = engine.validate( valid_loader) scheduler.step(valid_loss) # Add train Info history.add_train_info(sel_pos, train_acc, train_loss, train_labels, train_preds, valid_acc, valid_loss, valid_labels, valid_preds) tm = datetime.datetime.now().strftime("%H:%M:%S") print( f"{tm}, fold={fold}, epoch={epoch}, train_loss={train_loss:.4f}, valid_loss={valid_loss:.4f}, valid_acc={valid_acc:.4f}" ) if valid_loss < best_loss: best_loss = valid_loss tm = datetime.datetime.now().strftime("%m%d") torch.save( model.state_dict(), f"E:/kaggle_imgs/H2/saved_models/model_fold_{fold}_{tm}.bin") early_stopping_cnt = 0 else: early_stopping_cnt += 1 if early_stopping_cnt >= early_stopping: break print(f"fold={fold}, best val loss={best_loss}")