def main(config, resume): # Dataset fine_dataset = self_defined_dataset(config) # Dataloder train_loader = DataLoader(fine_dataset, shuffle=True, batch_size=config['batch_size'], num_workers=8) val_loader = DataLoader(fine_dataset, shuffle=False, batch_size=config['batch_size'], num_workers=8) test_loader = DataLoader(fine_dataset, shuffle=False, batch_size=config['batch_size'], num_workers=8) # Model start_epoch = 0 if config['model_name'].startswith('resnet'): model = ResNet(config) elif config['model_name'].startswith('densenet'): model = DenseNet(config) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") #Optimizer optimizer = torch.optim.Adam(model.parameters(), lr=config['learning_rate'], weight_decay=1e-5) # if use pretrained models if resume: filepath = config['pretrain_path'] start_epoch, learning_rate, optimizer = load_ckpt(model, filepath) start_epoch += 1 # if use multi-GPU if torch.cuda.device_count() > 1: print("Let's use", torch.cuda.device_count(), "GPUs!") model = nn.DataParallel(model) model.to(device) #resume or not if start_epoch == 0: print("Grand New Training") scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau( optimizer, patience=config['switch_learning_rate_interval']) if not resume: learning_rate = config['learning_rate'] # training part if config['if_train']: for epoch in range(start_epoch + 1, start_epoch + config['num_epoch'] + 1): loss_tr = train( train_loader, model, optimizer, epoch, config) #if training, delete learning rate and add optimizer if config['if_valid'] and epoch % config[ 'valid_epoch_interval'] == 0: with torch.no_grad(): loss_val = valid(val_loader, model, epoch, config) scheduler.step(loss_val) save_ckpt(model, optimizer, epoch, loss_tr, loss_val, config) test(test_loader, model, config) #store_config(config) print("Training finished ...")
def main(config, resume, phase): # Dataset fine_dataset = fine_clustering_dataset(config) # Dataloder train_loader = DataLoader(fine_dataset, shuffle=True, batch_size=config['batch_size'], num_workers=32) val_loader = DataLoader(fine_dataset, shuffle=False, batch_size=config['batch_size'], num_workers=32) test_loader = DataLoader(fine_dataset, shuffle=False, batch_size=config['batch_size'], num_workers=32) # Model start_epoch = 0 if config['model_name'].startswith('resnet'): model = ResNet(config) elif config['model_name'].startswith('densenet'): model = DenseNet(config) elif config['model_name'].startswith('deeplab'): cluster_vector_dim = config['cluster_vector_dim'] model = DeepLabv3_plus(nInputChannels=3, n_classes=3, os=16, cluster_vector_dim=cluster_vector_dim, pretrained=True, _print=True) elif config['model_name'].startswith('bagnet'): model = BagNet(config=config) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") if resume: filepath = config['pretrain_path'] start_epoch, learning_rate, optimizer, M, s = load_ckpt( model, filepath) start_epoch += 1 if torch.cuda.device_count() > 1: print("Let's use", torch.cuda.device_count(), "GPUs!") model = nn.DataParallel(model) model.to(device) #Optimizer optimizer = torch.optim.Adam(model.parameters(), lr=config['learning_rate'], weight_decay=1e-5) #resume or not if start_epoch == 0: print("Grand New Training") scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau( optimizer, patience=config['switch_learning_rate_interval']) # log_dir = config['log_dir']+"/{}_{}_".format(config['date'],config['model_name'])+"ep_{}-{}_lr_{}".format(start_epoch,start_epoch+config['num_epoch'],config['learning_rate']) # best loss if not resume: learning_rate = config['learning_rate'] M, s = cluster_initialization(train_loader, model, config, phase) print(start_epoch) if config['if_train']: for epoch in range(start_epoch + 1, start_epoch + config['num_epoch'] + 1): loss_tr = train( train_loader, model, optimizer, epoch, config, M, s) #if training, delete learning rate and add optimizer if config['if_valid'] and epoch % config[ 'valid_epoch_interval'] == 0: with torch.no_grad(): loss_val, M, s = valid(val_loader, model, epoch, config, learning_rate, M, s, phase) scheduler.step(loss_val) save_ckpt(model, optimizer, epoch, loss_tr, loss_val, config, M, s) else: val_log = open("../log/val_" + config['date'] + ".txt", "a") val_log.write('epoch ' + str(epoch) + '\n') val_log.close() test(test_loader, model, config, M, phase) store_config(config, phase) print("Training finished ...")