def regression(variant = '1_hour', structureless = False, batch_size = 8, x_size = 12, h_size = 8, emo_size = 8, top_sizes = (16,16), p_drop = 0.1, verbose = 1, bi = True, deep = True, lr_tree = 0.05, lr_top = 0.01, decay_tree = 0.003, decay_top = 0.006, epochs = 60, cuda_id = -1): data_dir = '../data/' out_dir = '../results/' graphs_dir = data_dir + 'graphs/' cascade_size_file = data_dir + 'cascade_size.csv' device = set_device(cuda_id) if structureless: x_size = x_size - 2 train_ids = np.array([ID.split('_')[0] for ID in os.listdir(graphs_dir) if variant in ID and 'test' not in ID]) test_ids = np.unique([ID.split('_')[0] for ID in os.listdir(graphs_dir) if variant + '_test' in ID]) train_set = CascadeData(train_ids, graphs_dir, cascade_size_file, variant=variant, structureless=structureless) test_set = CascadeData(test_ids, graphs_dir, cascade_size_file, variant=variant, structureless=structureless, test=True) train_generator = DataLoader(train_set, collate_fn=cascade_batcher(device), batch_size=batch_size, num_workers=8) test_generator = DataLoader(test_set, collate_fn=cascade_batcher(device), batch_size=batch_size, num_workers=8) deep_tree = DeepTreeLSTMRegressor(x_size, emo_size, h_size=h_size, top_sizes=top_sizes, bi=bi, deep=deep, pd=p_drop) criterion = nn.MSELoss().to(device) optimizer_tree = th.optim.SGD(deep_tree.bottom_net.parameters(), lr = lr_tree, weight_decay = decay_tree) optimizer_top = th.optim.SGD(deep_tree.top_net.parameters(), lr = lr_top, weight_decay = decay_top) scheduler_tree = th.optim.lr_scheduler.StepLR(optimizer_tree, step_size=10, gamma=0.8) scheduler_top = th.optim.lr_scheduler.StepLR(optimizer_top, step_size=10, gamma=0.8) callbacks = [EarlyStopping(patience=10), ModelLogger(out_dir+'models/'), ExperimentLogger(out_dir, 'logs_regression.csv')] model_trainer = DeepTreeTrainer(deep_tree) model_trainer.compile(optimizer_tree, optimizer_top, criterion, scheduler_tree=scheduler_tree, scheduler_top=scheduler_top, callbacks=callbacks, metrics=['mae']) model_trainer.fit(train_generator, test_generator, epochs, cuda_id, verbose) return deep_tree
def run_training(fold:int): df = pd.read_csv(os.path.join(config.DATA_PATH, "train_folds.csv")) train_data_loader = get_train_data_loader(df, fold) valid_data_loader = get_valid_data_loader(df, fold) model = WheatModel(config.NUM_CLASSES) model.to(config.DEVICE) params = [p for p in model.parameters() if p.requires_grad] optimizer = torch.optim.SGD(params, lr=config.LR, momentum=0.9, weight_decay=0.0005) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode="max", patience=5) es = EarlyStopping(patience=10, mode="max", verbose=True, path=f"checkpoint-f{fold}.pt") tb = SummaryWriter(comment=f"lr={config.LR}") print_freq = round(len(train_data_loader)/4) for epoch in range(0, config.EPOCHS): loss = train_one_epoch(model, train_data_loader, optimizer, epoch, print_freq) tb.add_scalar('loss', loss, epoch) m_ap = evaluate(model, valid_data_loader, epoch) tb.add_scalar('mAP', m_ap, epoch) es(m_ap, model) if es.early_stop: print("Early stopping") break scheduler.step(m_ap) tb.close()
momentum=args.momentum) logger.info('Number of model parameters: {:,}'.format( sum([p.data.nelement() for p in model.parameters()]))) trainer = Trainer(model, optimizer, watch=['acc'], val_watch=['acc']) if args.is_train: logger.info("Train on {} samples, validate on {} samples".format( len(train_loader.dataset), len(val_loader.dataset))) start_epoch = 0 if args.resume: start_epoch = load_checkpoint(args.ckpt_dir, model, optimizer) trainer.train(train_loader, val_loader, start_epoch=start_epoch, epochs=args.epochs, callbacks=[ PlotCbk(model, args.plot_num_imgs, args.plot_freq, args.use_gpu), TensorBoard(model, args.log_dir), ModelCheckpoint(model, optimizer, args.ckpt_dir), LearningRateScheduler( ReduceLROnPlateau(optimizer, 'min'), 'val_loss'), EarlyStopping(model, patience=args.patience) ]) else: logger.info("Test on {} samples".format((len(test_loader)))) load_checkpoint(args.ckpt_dir, model, best=True) trainer.test(test_loader, best=args.best)
return loss, preds def val_step(x, t): x = torch.LongTensor(x).to(device) t = torch.Tensor(t).to(device) model.eval() preds = model(x) loss = criterion(preds, t) return loss, preds epochs = 1000 batch_size = 100 n_batches_train = x_train.shape[0] // batch_size n_batches_val = x_val.shape[0] // batch_size es = EarlyStopping(patience=5, verbose=1) for epoch in range(epochs): train_loss = 0. train_acc = 0. val_loss = 0. val_acc = 0. x_, t_ = shuffle(x_train, t_train) for batch in range(n_batches_train): start = batch * batch_size end = start + batch_size loss, preds = train_step(x_[start:end], t_[start:end]) train_loss += loss.item() train_acc += \ accuracy_score(t_[start:end].tolist(),
pin_memory=pin_memory) # . . define the model #model = NvidiaNetwork() model = NetworkLight() # . . create the trainer trainer = Trainer(model, device) # . . compile the trainer # . . define the loss criterion = nn.MSELoss() # . . define the optimizer optimparams = {'lr': learning_rate} # . . define the callbacks cb = [ReturnBestModel(), EarlyStopping(min_delta=min_delta, patience=patience)] trainer.compile(optimizer='adam', callbacks=cb, **optimparams) # . . train the network train_loss, valid_loss = trainer.fit(trainloader, validloader, num_epochs=num_epochs) # . . plot the loss plt.plot(train_loss) plt.plot(valid_loss) plt.legend(['train_loss', 'valid_loss']) plt.show() # . . save the model state = {
model = arch.build_model() optimizer = RMSprop(lr) #Nadam(lr) # # metrics = [Recall(), Precision(), dice_coef, MeanIoU(num_classes=2)] # # model.compile(loss=dice_loss, optimizer=optimizer, metrics=metrics) # metrics = [Recall(), Precision(), dice_coef, MeanIoU(num_classes=2),miou_coef] # model.compile(loss=miou_loss, optimizer=optimizer, metrics=metrics) # # model.compile(loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), optimizer=optimizer, metrics=metrics) ## Model with CustomObjectScope({'dice_loss': dice_loss, 'dice_coef': dice_coef,'miou_loss':miou_loss,'miou_coef':miou_coef}): model = load_model(model_path) print("model loaded successfully") metrics = [Recall(), Precision(), dice_coef, MeanIoU(num_classes=2),miou_coef] model.compile(loss=miou_loss, optimizer=optimizer, metrics=metrics) print("model compiled successfully") csv_logger = CSVLogger(f"{file_path}{model_name}_{batch_size}_{epochs}.csv", append=False) checkpoint = ModelCheckpoint(model_path, verbose=1, save_best_only=True) reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=5, min_lr=1e-6, verbose=1) early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=False) callbacks = [csv_logger, checkpoint, reduce_lr, early_stopping] model.fit_generator(train_gen, validation_data=valid_gen, steps_per_epoch=train_steps, validation_steps=valid_steps, epochs=epochs, callbacks=callbacks) # !python3 resume_training.py
def main(args): config_path = os.path.join(args.model_dir, 'config.json') with open(config_path) as f: config = json.load(f) logging.info('loading embedding...') with open(config['model_parameters']['embedding'], 'rb') as f: embedding = pickle.load(f) config['model_parameters']['embedding'] = embedding.vectors logging.info('loading valid data...') with open(config['model_parameters']['valid'], 'rb') as f: config['model_parameters']['valid'] = pickle.load(f) #valid = pickle.load(f) logging.info('loading train data...') with open(config['train'], 'rb') as f: train = pickle.load(f) if config['arch'] == 'ExampleNet': #from modules import ExampleNet from predictors import ExamplePredictor PredictorClass = ExamplePredictor predictor = PredictorClass( metrics=[Recall(1), Recall(10)], batch_size=128, max_epochs=1000000, dropout_rate=0.2, learning_rate=1e-3, grad_accumulate_steps=1, loss='BCELoss', #BCELoss, FocalLoss margin=0, threshold=None, similarity='MLP', #inner_product, Cosine, MLP device=args.device, **config['model_parameters']) elif config['arch'] == 'RnnNet': from predictors import RnnPredictor PredictorClass = RnnPredictor predictor = PredictorClass( metrics=[Recall(1), Recall(10)], batch_size=512, max_epochs=1000000, dropout_rate=0.2, learning_rate=1e-3, grad_accumulate_steps=1, loss='FocalLoss', #BCELoss, FocalLoss margin=0, threshold=None, similarity='Cosine', #inner_product, Cosine, MLP device=args.device, **config['model_parameters']) elif config['arch'] == 'RnnAttentionNet': from predictors import RnnAttentionPredictor PredictorClass = RnnAttentionPredictor predictor = PredictorClass( metrics=[Recall(1), Recall(10)], batch_size=32, max_epochs=1000000, dropout_rate=0.2, learning_rate=1e-3, grad_accumulate_steps=1, loss='BCELoss', #BCELoss, FocalLoss margin=0, threshold=None, similarity='MLP', #inner_product, Cosine, MLP device=args.device, **config['model_parameters']) else: logging.warning('Unknown config["arch"] {}'.format(config['arch'])) if args.load is not None: predictor.load(args.load) #def ModelCheckpoint(filepath, monitor='loss', verbose=0, mode='min') model_checkpoint = ModelCheckpoint(os.path.join(args.model_dir, 'model.pkl'), monitor='Recall@{}'.format(10), verbose=1, mode='all') metrics_logger = MetricsLogger(os.path.join(args.model_dir, 'log.json')) early_stopping = EarlyStopping(os.path.join(args.model_dir, 'model.pkl'), monitor='Recall@{}'.format(10), verbose=1, mode='max', patience=30) logging.info('start training!') #print ('train', train) predictor.fit_dataset(train, train.collate_fn, [model_checkpoint, metrics_logger, early_stopping])
import datasets as d import models import torch_geometric as pyg import torch_funcs import torch import torch.nn as nn import callbacks from callbacks import Checkpoints, EarlyStopping trainSet = d.InMemMolecule(partition='train') trainLoader = pyg.data.DataLoader(trainSet, shuffle=True, batch_size=64) valSet = d.InMemMolecule(partition='dev') valLoader = pyg.data.DataLoader(valSet, shuffle=True, batch_size=64) model = models.Model5(input_dimension=15) optimizer = torch.optim.Adam(model.parameters(), lr=0.001) loss = nn.MSELoss() earlyStopping = EarlyStopping(patience=100) checkpoints = Checkpoints(checkpoint_start=0) callback_list = [earlyStopping, checkpoints] history = torch_funcs.train_model(model, loss, optimizer, trainLoader, valLoader, n_epochs=200, callbacks=callback_list, output='long') torch_funcs.plot_history(history, size=(7, 5))