def run_training(fold, params, save_model=False): df = pd.read_csv('../data/train_features.csv') df = df.drop(['cp_type', 'cp_time', 'cp_dose'], axis=1) targets_df = pd.read_csv('../data/train_target_folds.csv') features = df.drop('sig_id', axis=1).columns target_columns = targets_df.drop(['sig_id', 'kfold'], axis=1).columns df = df.merge(targets_df, on='sig_id', how='left') train_df = df[df.kfold != fold].reset_index(drop=True) valid_df = df[df.kfold != fold].reset_index(drop=True) xtrain = train_df[features].to_numpy() ytrain = train_df[target_columns].to_numpy() xvalid = valid_df[features].to_numpy() yvalid = valid_df[target_columns].to_numpy() train_dataset = utils.MoaDataset(features=xtrain, targets=ytrain) valid_dataset = utils.MoaDataset(features=xvalid, targets=yvalid) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=1024, num_workers=8, shuffle=True) valid_loader = torch.utils.data.DataLoader(valid_dataset, batch_size=1024, num_workers=8) model = utils.Model(n_features=xtrain.shape[1], n_targets=ytrain.shape[1], n_layers=params['num_layers'], hidden_size=params['hidden_size'], dropout=params['dropout']) model.to(DEVICE) optimizer = torch.optim.Adam(model.parameters(), lr=params['learning_rate']) eng = utils.Engine(model, optimizer, device=DEVICE) best_loss = np.Inf early_stopping_iter = 10 early_stopping_counter = 0 for epoch in range(EPOCHS): train_loss = eng.train(train_loader) valid_loss = eng.evaluate(valid_loader) print(f'{fold}, {epoch}, {train_loss}, {valid_loss}') if valid_loss < best_loss: best_loss = valid_loss if save_model: torch.save(model.state_dict(), f'model_{fold}.bin') else: early_stopping_counter += 1 if early_stopping_counter > early_stopping_iter: break return best_loss
def run_training(fold, params, save_model=False): df = pd.read_csv("../Data/lish-moa/train_features.csv") df = df.drop(["cp_type", "cp_time", "cp_dose"], axis=1) targets_df = pd.read_csv( "/home/self-made-lol/Desktop/Mechanism_of_Actions/Data/lish-moa/train_tragets_fold.csv" ) features_columns = df.drop("sig_id", axis=1).columns target_columns = targets_df.drop(["sig_id", "kfold"], axis=1).columns df = df.merge(targets_df, on='sig_id', how='left') train_df = df[df.kfold != fold].reset_index(drop=True) valid_df = df[df.kfold == fold].reset_index(drop=True) xtrain = train_df[features_columns].to_numpy() ytrain = train_df[target_columns].to_numpy() xvalid = valid_df[features_columns].to_numpy() yvalid = valid_df[target_columns].to_numpy() train_dataset = utils.MoaDataset(features=xtrain, targets=ytrain) valid_dataset = utils.MoaDataset(features=xvalid, targets=yvalid) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=1024, num_workers=8, shuffle=True) valid_loader = torch.utils.data.DataLoader(valid_dataset, batch_size=1024, num_workers=8) model = utils.Model( nfeatures=xtrain.shape[1], ntargets=ytrain.shape[1], nlayers=params["num_layers"], hidden_size=params["hidden_size"], dropout=params["dropout"], ) model.to(DEVICE) optimizer = torch.optim.Adam(model.parameters(), lr=params["learning_rate"]) eng = utils.Engine(model, optimizer, device=DEVICE) best_loss = np.inf early_stopping_iter = 10 early_stopping_counter = 0 for epoch in range(EPOCHS): train_loss = eng.train(train_loader) valid_loss = eng.evaluate(valid_loader) print(f"{fold}, {epoch}, {train_loss}, {valid_loss}") if valid_loss < best_loss: best_loss = valid_loss if save_model: torch.save(model.state_dict(), f"model_{fold}.bin") else: early_stopping_counter += 1 if early_stopping_counter > early_stopping_iter: break return best_loss
def run_training(fold, save_model=False): df = pd.read_csv("./input/train_features.csv") df = df.drop(["cp_time", "cp_dose", "cp_type"], axis=1) targets_df = pd.read_csv("./input/train_targets_folds.csv") feature_columns = df.drop("sig_id", axis=1).columns target_columns = targets_df.drop("sig_id", axis=1).columns df = df.merge(targets_df, on="sig_id", how="left") # print(df) train_df = df[df.kfold != fold].reset_index(drop=True) valid_df = df[df.kfold == fold].reset_index(drop=True) xtrain = train_df[feature_columns].to_numpy() ytrain = train_df[target_columns].to_numpy() xvalid = valid_df[feature_columns].to_numpy() yvalid = valid_df[target_columns].to_numpy() train_dataset = utils.MOADataset(features=xtrain, targets=ytrain) valid_dataset = utils.MOADataset(features=xvalid, targets=yvalid) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=1024, num_workers=8, shuffle=True) valid_loader = torch.utils.data.DataLoader(valid_dataset, batch_size=1024, num_workers=8) model = utils.Model( nfeatures=xtrain.shape[1], ntargets=ytrain.shape[1], nlayers=2, hidden_size=128, dropout=0.3, ) model.to(DEVICE) optimizer = torch.optim.Adam(model.parameters(), lr=0.001) eng = utils.Engine(model, optimizer, device=DEVICE) best_loss = np.inf early_stopping_iter = 10 early_stopping_counter = 0 for epoch in range(EPOCHS): train_loss = eng.train(train_loader) valid_loss = eng.evaluate(valid_loader) print(f"{fold}, {epoch}, {train_loss}, {valid_loss}") if valid_loss < best_loss: best_loss = valid_loss if save_model: torch.save(model.state_dict(), f"model_{fold}.bin") else: early_stopping_counter += 1 if early_stopping_counter > early_stopping_iter: break
def run_training(fold, params, save_model=False): df = pd.read_csv("input/folds/train.csv") with open("input/folds/targets", "r") as f: targets = f.read().split("\n") with open("input/folds/features", "r") as f: features = f.read().split("\n") print(f"\n[Fold No.{fold:>2}]\n") train_df = df[df.kfold != fold].reset_index(drop=True) valid_df = df[df.kfold == fold].reset_index(drop=True) x_tr = train_df[features].to_numpy() x_va = valid_df[features].to_numpy() y_tr = train_df[targets].to_numpy() y_va = valid_df[targets].to_numpy() # TODO: [BEGIN] NN以外の学習を記述 dataset_tr = utils.MoaDataset(x_tr, y_tr) loader_tr = torch.utils.data.DataLoader(dataset_tr, batch_size=1024, num_workers=2) dataset_va = utils.MoaDataset(x_va, y_va) loader_va = torch.utils.data.DataLoader(dataset_va, batch_size=1024, num_workers=2) model = models.BaseLine(num_features=x_tr.shape[1], num_targets=y_tr.shape[1], params=params) model.to(DEVICE) # TODO: 最適化関数とスケジューラの最適化もoptunaに任せたい optimizer = torch.optim.Adam(model.parameters(), lr=params["learning_rate"]) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, threshold=0.00001, mode="min", verbose=True) eng = utils.Engine(model, optimizer, device=DEVICE) # Free RAM space as much as possible before training del df, train_df, valid_df, x_tr, x_va, y_tr, y_va gc.collect() # TODO: [END] NN以外の学習を記述 loss_best = np.inf patience = 10 patience_cnt = 0 for ep in range(EPOCHS): loss_tr = eng.train(loader_tr) loss_va = eng.validate(loader_va) scheduler.step(loss_va) print(f"epoch:{ep:>2}, train:{loss_tr:>.5}, valid:{loss_va:>.5}") if loss_va < loss_best: loss_best = loss_va if save_model: pass else: patience_cnt += 1 if patience_cnt > patience: break print(f"[Fold No.{fold:>2}]") print(f"epoch:{ep:>3}, train:{loss_tr:>.5}, valid:{loss_va:>.5}") if save_model: now = datetime.now() now = str(now)[5:17].replace(" ", "_").replace(":", "") filename = f"weight/model{now}/fold{fold}.pt" torch.save(model.model.state_dict(), filename) print("model saved at:", filename) return loss_best
def run_training(fold, save_model=False): df = pd.read_csv( "/home/hasan/Data Set/Drug Classification/train_features.csv") df = df.drop(['cp_type', 'cp_time', 'cp_dose'], axis=1) targets_df = pd.read_csv("/home/hasan/spyder_code/train_targets_folds.csv") feature_columns = df.drop('sig_id', axis=1).columns target_columns = targets_df.drop(['sig_id', 'kfold'], axis=1).columns df = df.merge(targets_df, on='sig_id', how='left') train_df = df[df.kfold != fold].reset_index(drop=True) valid_df = df[df.kfold == fold].reset_index(drop=True) xtrain = train_df[feature_columns].to_numpy() ytrain = train_df[target_columns].to_numpy() xvalid = valid_df[feature_columns].to_numpy() yvalid = valid_df[target_columns].to_numpy() train_dataset = utils.MoaDataset(features=xtrain, targets=ytrain) valid_dataset = utils.MoaDataset(features=xvalid, targets=yvalid) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=1024, num_workers=8, shuffle=True) valid_loader = torch.utils.data.DataLoader(valid_dataset, batch_size=1024, num_workers=8) model = utils.Model(nfeatures=xtrain.shape[1], ntargets=ytrain.shape[1], nlayers=2, hidden_size=128, dropout=0.3) # model.to(DEVICE) optimizer = torch.optim.Adam(model.parameters(), lr=0.001) eng = utils.Engine(model, optimizer, DEVICE) best_loss = np.inf early_stopping_iter = 10 early_stopping_counter = 0 for epoch in range(EPOCHS): train_loss = eng.train(train_loader) valid_loss = eng.evaluate(valid_loader) print(f"{fold}, {epoch}, {train_loss}, {valid_loss}") if valid_loss < best_loss: best_loss = valid_loss if save_model: torch.save(model.state_dict(), f'model_{fold}.bin') else: early_stopping_counter += 1 if early_stopping_counter > early_stopping_iter: break
def run_training(): if torch.cuda.is_available(): DEVICE = 'cuda' else: DEVICE = 'cpu' df_train = pd.read_csv(PATH + 'train_features.csv') targets = pd.read_csv(PATH + 'train_targets_scored.csv') utils.get_dummies(df_train, ['cp_type', 'cp_dose', 'cp_time']) sig_ids = df_train['sig_id'] df_train.drop('sig_id', axis=1, inplace=True) targets.drop('sig_id', axis=1, inplace=True) # TODO use unscored data for training as well X_train, X_val, y_train, y_val = train_test_split(df_train.values, targets.values, test_size=0.3, random_state=42) train_dataset = utils.ModelDataset(X_train, y_train) train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=4) val_dataset = utils.ModelDataset(X_val, y_val) val_loader = DataLoader(val_dataset, batch_size=1) model = utils.Model(X_train.shape[1], y_train.shape[1], num_layers, hidden_size) model.to(DEVICE) optimizer = torch.optim.Adam(model.parameters(), lr=1e-4) scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=30, gamma=0.1) engine = utils.Engine(model, optimizer, device=DEVICE) best_loss = np.inf early_stopping = 10 early_stopping_counter = 0 # TODO use optuns for trails for epoch in range(EPOCHS): train_loss = engine.train(train_loader) val_loss = engine.validate(val_loader) scheduler.step(val_loss) print(f'Epoch {epoch}, train_loss {train_loss}, val_loss {val_loss}') if val_loss < best_loss: best_loss = val_loss torch.save(model.state_dict(), '/models') else: early_stopping_counter += 1 if early_stopping_counter > early_stopping: break print(f'best loss {best_loss}') return best_loss