def predict(fold,model_name): df=pd.read_csv("E:/kaggle_imgs/Plant-pathology-2020/Data/train_fold.csv")[0:10] device="cuda" if torch.cuda.is_available() else "cpu" imgs=df.image_id.values.tolist() path="E:/kaggle_imgs/Plant-pathology-2020/images_224_224/" test_imgs=[path+file+".png" for file in imgs] utils=Utils(mode="test") test_aug=utils.get_aug() test_tar=np.zeros((len(imgs),4)) test_dataset=ClassificationLoader( image_paths=test_imgs,targets=test_tar,resize=None,augmentations=test_aug ) test_loader=torch.utils.data.DataLoader( test_dataset,batch_size=bs_test,num_workers=4,shuffle=False ) model=get_effinet(classes=2) model_save_path=f"./model_fold_{fold}.bin" model.load_state_dict(torch.load(model_save_path)) model=model.to(device) engine=Engine(model,None,device,classes=2,weights=None) preds=engine.predict(test_loader) preds=np.vstack(preds) #script to c++ sample=torch.rand(1,3,224,224) model.to("cpu") traced_script_module = torch.jit.trace(model,sample) traced_script_module.save("E:/temp/saved_models/"+f"/traced_1015_fold_{fold}.pt") return preds
def predict(fold=0): df = pd.read_csv("../input/plant-pathology-2020-fgvc7/test.csv") device = "cuda" if torch.cuda.is_available() else "cpu" imgs = df.image_id.values.tolist() path = "../input/plant-images-224-224/" test_imgs = [path + file + ".png" for file in imgs] test_aug = Utils.get_aug("test") test_tar = np.zeros((len(imgs), 4)) test_dataset = ClassificationLoader(image_paths=test_imgs, targets=test_tar, resize=None, augmentations=test_aug) test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=bs_test, num_workers=0, shuffle=False) model = get_model(model_name) model_save_path = f"./model_fold_{fold}.bin" model.load_state_dict(torch.load(model_save_path)) model = model.to(device) engine = Engine(model, None, device) preds = engine.predict(test_loader) preds = np.vstack(preds) return preds
def predict(fold): df = train[0:10] device = "cuda" if torch.cuda.is_available() else "cpu" test_imgs = df.tar_path.values.tolist() test_aug = Utils.get_aug("test") test_tar = np.zeros((len(test_imgs), 2)) test_dataset = ClassificationLoader(image_paths=test_imgs, targets=test_tar, resize=None, augmentations=test_aug) test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=bs_valid, num_workers=0, shuffle=False) model = get_model_effi_b4(2) tm = datetime.datetime.now().strftime("%m%d") model_save_path = f"E:/kaggle_imgs/H2/saved_models/model_fold_{fold}_{tm}.bin" model.load_state_dict(torch.load(model_save_path)) model = model.to(device) engine = Engine(model, None, device, classes=2, weights=None) preds = engine.predict(test_loader) preds = np.vstack(preds) #.argmax(axis=1) #script to c++ sample = torch.rand(1, 3, 224, 224) model.to("cpu") model.set_swish(False) traced_script_module = torch.jit.trace(model, sample) traced_script_module.save( f"E:/kaggle_imgs/H2/saved_models/traced_{model_name}_fold_{fold}_{tm}.pt" ) return preds
def loop_train(fold, model_name,weights,sel_weight): history.initial_info(sel_weight) train_df=train[train.fold!=fold].reset_index(drop=True)#[0:65] valid_df=train[train.fold==fold].reset_index(drop=True)#[0:64] train_df["result"]=train_df["CAT"].apply(lambda x : x>0).astype(np.int) valid_df["result"]=valid_df["CAT"].apply(lambda x : x>0).astype(np.int) imgs=train_df.image_id.values.tolist() path="E:/kaggle_imgs/Plant-pathology-2020/images_224_224/" train_imgs=[path+file+".png" for file in imgs] train_aug=Utils.get_aug("train") train_tar=train_df.result.values train_dataset=ClassificationLoader( image_paths=train_imgs,targets=train_tar,resize=None,augmentations=train_aug ) # CutMix_train_dataloader = CutMix(train_dataset, # num_class=4, # beta=1.0, # prob=0.999, # num_mix=1) CutMix_train_dataloader=train_dataset train_loader=torch.utils.data.DataLoader( CutMix_train_dataloader,batch_size=bs_train,num_workers=4,shuffle=True ) imgs=valid_df.image_id.values.tolist() path="E:/kaggle_imgs/Plant-pathology-2020/images_224_224/" valid_imgs=[path+file+".png" for file in imgs] valid_aug=Utils.get_aug("valid") valid_tar=valid_df.result.values valid_dataset=ClassificationLoader( image_paths=valid_imgs,targets=valid_tar,resize=None,augmentations=valid_aug ) # CutMix_valid_dataloader = CutMix(valid_dataset, # num_class=4, # beta=1.0, # prob=0, # num_mix=1) CutMix_valid_dataloader=valid_dataset valid_loader=torch.utils.data.DataLoader( CutMix_valid_dataloader,batch_size=bs_valid,num_workers=4,shuffle=False ) # Model,Optimizer, scheduler, engine model=get_effinet(classes=2) device="cuda" if torch.cuda.is_available() else "cpu" model=model.to(device) optimizer=torch.optim.Adam(model.parameters(),lr=1e-4) scheduler=torch.optim.lr_scheduler.ReduceLROnPlateau( optimizer,patience=3,threshold=1e-5,mode="min",verbose=True ) engine=Engine(model,optimizer,device,classes=2,weights=weights) best_loss=np.inf early_stopping=3#3 early_stopping_cnt=0 EPOCH=300 for epoch in range(EPOCH): train_loss,train_acc=engine.train(train_loader) valid_loss,valid_acc,valid_labels,valid_preds=engine.validate(valid_loader) scheduler.step(valid_loss) # Add train Info history.add_train_info(sel_weight,train_acc,train_loss,valid_acc,valid_loss,valid_labels,valid_preds) tm=datetime.datetime.now().strftime("%H:%M:%S") print(f"{tm}, fold={fold}, epoch={epoch}, train_loss={train_loss:.4f}, valid_loss={valid_loss:.4f}, valid_acc={valid_acc:.4f}") if valid_loss<best_loss : best_loss=valid_loss torch.save(model.state_dict(),f"model_fold_{fold}.bin") early_stopping_cnt=0 history.best_idx[fold]=epoch else: early_stopping_cnt+=1 if early_stopping_cnt>early_stopping: break print(f"fold={fold}, best val loss={best_loss}")
def loop_train(fold=0): ready_train_info(fold) train_df = train[train.fold != fold].reset_index(drop=True) #[0:32] valid_df = train[train.fold == fold].reset_index(drop=True) #[0:32] imgs = train_df.image_id.values.tolist() path = "E:/kaggle_imgs/Plant-pathology-2020/images_224_224/" train_imgs = [path + file + ".png" for file in imgs] train_aug = Utils.get_aug("train") #train_tar=train_df[["healthy","multiple_diseases","rust","scab"]].values train_tar = train_df.CAT.values train_dataset = ClassificationLoader(image_paths=train_imgs, targets=train_tar, resize=None, augmentations=train_aug) CutMix_train_dataloader = CutMix(train_dataset, num_class=4, beta=1.0, prob=0.999, num_mix=1) train_loader = torch.utils.data.DataLoader(CutMix_train_dataloader, batch_size=bs_train, num_workers=4, shuffle=True) imgs = valid_df.image_id.values.tolist() path = "E:/kaggle_imgs/Plant-pathology-2020/images_224_224/" valid_imgs = [path + file + ".png" for file in imgs] valid_aug = Utils.get_aug("valid") #valid_tar=valid_df[["healthy","multiple_diseases","rust","scab"]].values valid_tar = valid_df.CAT.values valid_dataset = ClassificationLoader(image_paths=valid_imgs, targets=valid_tar, resize=None, augmentations=valid_aug) CutMix_valid_dataloader = CutMix(valid_dataset, num_class=4, beta=1.0, prob=0, num_mix=1) valid_loader = torch.utils.data.DataLoader(CutMix_valid_dataloader, batch_size=bs_valid, num_workers=4, shuffle=False) # Model,Optimizer, scheduler, engine model = Utils.get_model("effinet") device = "cuda" if torch.cuda.is_available() else "cpu" model = model.to(device) optimizer = torch.optim.Adam(model.parameters(), lr=1e-4) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, threshold=1e-5, mode="min", verbose=True) engine = Engine(model, optimizer, device) best_loss = np.inf early_stopping = 7 #3 early_stopping_cnt = 0 EPOCH = 300 for epoch in range(EPOCH): train_loss, train_acc = engine.train(train_loader) valid_loss, valid_acc = engine.validate(valid_loader) scheduler.step(valid_loss) # Add train Info add_train_info(fold, train_acc, train_loss, valid_acc, valid_loss) if valid_loss < best_loss: best_loss = valid_loss torch.save(model.state_dict(), f"model_fold_{fold}.bin") tm = datetime.datetime.now().strftime("%H:%M:%S") print( f"{tm}, fold={fold}, epoch={epoch}, train_loss={train_loss:.6f}, valid_loss={valid_loss:.6f}" ) early_stopping_cnt = 0 else: early_stopping_cnt += 1 if early_stopping_cnt > early_stopping: break print(f"fold={fold}, best val loss={best_loss}")
def loop_train(fold, weights, sel_pos): train = pd.read_csv("E:/kaggle_imgs/H2/data/train_fold.csv") history.initial_info(sel_pos) train_df = train[train.fold != fold].reset_index(drop=True)[:100] valid_df = train[train.fold == fold].reset_index(drop=True)[:80] train_df["result"] = train_df["category"].apply(lambda x: x > 0).astype( np.int) valid_df["result"] = valid_df["category"].apply(lambda x: x > 0).astype( np.int) train_imgs = train_df.tar_path.values.tolist() train_aug = Utils.get_aug("train") train_tar = train_df.result.values train_dataset = ClassificationLoader(image_paths=train_imgs, targets=train_tar, resize=None, augmentations=train_aug) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=bs_train, num_workers=0, shuffle=True) valid_imgs = valid_df.tar_path.values.tolist() valid_aug = Utils.get_aug("valid") valid_tar = valid_df.result.values valid_dataset = ClassificationLoader(image_paths=valid_imgs, targets=valid_tar, resize=None, augmentations=valid_aug) valid_loader = torch.utils.data.DataLoader(valid_dataset, batch_size=bs_valid, num_workers=0, shuffle=False) model = get_model_effi_b4(classes=2) device = "cuda" if torch.cuda.is_available() else "cpu" model = model.to(device) optimizer = torch.optim.Adam(model.parameters(), lr=1e-4) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, threshold=1e-5, mode="min", verbose=True) engine = Engine(model, optimizer, device, classes=1, weights=weights) best_loss = np.inf early_stopping = 3 #3 early_stopping_cnt = 0 EPOCH = 3 #300 for epoch in range(EPOCH): train_loss, train_acc, train_labels, train_preds = engine.train( train_loader) valid_loss, valid_acc, valid_labels, valid_preds = engine.validate( valid_loader) scheduler.step(valid_loss) # Add train Info history.add_train_info(sel_pos, train_acc, train_loss, train_labels, train_preds, valid_acc, valid_loss, valid_labels, valid_preds) tm = datetime.datetime.now().strftime("%H:%M:%S") print( f"{tm}, fold={fold}, epoch={epoch}, train_loss={train_loss:.4f}, valid_loss={valid_loss:.4f}, valid_acc={valid_acc:.4f}" ) if valid_loss < best_loss: best_loss = valid_loss tm = datetime.datetime.now().strftime("%m%d") torch.save( model.state_dict(), f"E:/kaggle_imgs/H2/saved_models/model_fold_{fold}_{tm}.bin") early_stopping_cnt = 0 else: early_stopping_cnt += 1 if early_stopping_cnt >= early_stopping: break print(f"fold={fold}, best val loss={best_loss}")