def main(config): logger = config.get_logger('train') # device = cpu if not GPU device, _ = prepare_device(config['n_gpu']) # setup data_loader instances with MNIST data_loader_source = config.init_obj('data_loader_CVPPP', DataLoader) valid_data_loader_source = data_loader_source.split_validation() # setup data_loader instances with MNIST data_loader_target = config.init_obj('data_loader_KOMATSUNA', DataLoader) valid_data_loader_target = data_loader_target.split_validation() # build model architecture, then print to console model = config.init_obj('UNET_ADAPT_arch', module_arch) logger.info(model) # prepare for (multi-device) GPU training device, device_ids = prepare_device(config['n_gpu']) model = model.to(device) # get function handles of loss and metrics loss_fn_class = getattr(module_loss, config['density_loss']) loss_fn_domain = getattr(module_loss, config['domain_loss']) metrics = [getattr(module_metric, met) for met in config['metrics']] # build optimizer, learning rate scheduler. delete every lines containing lr_scheduler for disabling scheduler #trainable_params = filter(lambda p: p.requires_grad, model.parameters()) optimizer = config.init_obj('optimizer_CVPPP', torch.optim, [ { 'params': model.upsample.parameters(), 'lr': 1e-3 }, { 'params': model.downsample.parameters(), 'lr': 1e-3 }, { 'params': model.adapt.parameters(), 'lr': 1e-4 }, ]) lr_scheduler = config.init_obj('lr_scheduler', torch.optim.lr_scheduler, optimizer) trainer = Trainer(model=model, loss_fn_class=loss_fn_class, loss_fn_domain=loss_fn_domain, metric_ftns=metrics, optimizer=optimizer, config=config, device=device, data_loader_source=data_loader_source, valid_data_loader_source=valid_data_loader_source, data_loader_target=data_loader_target, valid_data_loader_target=valid_data_loader_target, lr_scheduler=lr_scheduler) trainer.train()
def main(config): logger = config.get_logger('train') # setup data_loader instances data_loader = config.init_obj('data_loader', module_data) valid_data_loader = data_loader.get_validation() # build model architecture, then print to console model = config.init_obj('arch', module_arch) logger.info(model) # prepare for (multi-device) GPU training device, device_ids = prepare_device(config['n_gpu']) model = model.to(device) if len(device_ids) > 1: model = torch.nn.DataParallel(model, device_ids=device_ids) # get function handles of loss and metrics criterion = getattr(module_loss, config['loss']) metrics = [getattr(module_metric, met) for met in config['metrics']] # build optimizer, learning rate scheduler. delete every lines containing lr_scheduler for disabling scheduler trainable_params = filter(lambda p: p.requires_grad, model.parameters()) optimizer = config.init_obj('optimizer', torch.optim, trainable_params) lr_scheduler = config.init_obj('lr_scheduler', torch.optim.lr_scheduler, optimizer) trainer = Trainer(model, criterion, metrics, optimizer, config=config, device=device, data_loader=data_loader, valid_data_loader=valid_data_loader, lr_scheduler=lr_scheduler) trainer.train()
def __init__(self, config): self.config = config self.logger = config.get_logger('demo') self.device, device_ids = prepare_device(self.logger, config['n_gpu']) torch.set_grad_enabled(False) self.model = config.init_obj('arch', module_arch) self.logger.info('Loading checkpoint: {} ...'.format(config.resume)) if config['n_gpu'] > 0: checkpoint = torch.load(config.resume) else: checkpoint = torch.load(config.resume, map_location=torch.device('cpu')) state_dict = checkpoint['state_dict'] if config['n_gpu'] > 1: self.model = torch.nn.DataParallel(self.model) self.model.load_state_dict(state_dict) self.model = self.model.to(self.device) self.model.eval() self.postprocessor = None if 'postprocessor' in config["tester"]: module_name = config["tester"]['postprocessor']['type'] module_args = dict(config["tester"]['postprocessor']['args']) self.postprocessor = getattr(postps_crf, module_name)(**module_args) self.demo_dir = None self.classes = None
def __init__(self, config: ConfigParser, model: nn.Module, precision_threshold: float = 0.0, recall_threshold: float = 0.0, logger=None): self.config = config self.logger = logger if logger else config.get_logger('inference') self.p_threshold: float = precision_threshold self.r_threshold: float = recall_threshold self.device, self.device_ids = prepare_device(config['n_gpu'], self.logger) self.state_dicts = [] checkpoints = [config.resume] if config.resume is not None else list( config.save_dir.glob('**/model_best.pth')) for checkpoint in checkpoints: self.logger.info(f'Loading checkpoint: {checkpoint} ...') state_dict = torch.load(checkpoint, map_location=self.device)['state_dict'] self.state_dicts.append( {k.replace('module.', ''): v for k, v in state_dict.items()}) self.model = model
def train(config) -> None: setup_logging('train') logger = logging.getLogger() logger.info(f'Training: {config}') seed_everything(config['SEED']) # setup data_loader instances data_loader = eval(config["DATA_LOADER"]["TYPE"])(**config["DATA_LOADER"]["ARGS"]) valid_data_loader = data_loader.split_validation() # build model architecture, then print to console model = create_model((config["MODEL"]["TYPE"]))(**config["MODEL"]["ARGS"]) logger.info(model) # prepare for (multi-device) GPU training device, device_ids = prepare_device(config['N_GPU']) model = model.to(device) if len(device_ids) > 1: model = torch.nn.DataParallel(model, device_ids=device_ids) # get function handles of loss and metrics criterion = eval(config['LOSS']).to(device) metrics = [eval(met) for met in config['METRICS']] # build optimizer, learning rate scheduler. delete every lines containing lr_scheduler for disabling scheduler optimizer = create_optimizer(config["OPTIMIZER"]["TYPE"])(**config["OPTIMIZER"]["ARGS"], model=model) lr_scheduler, num_epochs = create_scheduler(config["LR_SCHEDULER"]["TYPE"])(**config["LR_SCHEDULER"]["ARGS"], optimizer=optimizer) trainer = Trainer(model, criterion, metrics, optimizer, config=config, device=device, data_loader=data_loader, valid_data_loader=valid_data_loader, lr_scheduler=lr_scheduler) trainer.train()
def send_model2device(self, str_cuda_idxs): device, device_ids = prepare_device(str_cuda_idxs) if len(device_ids) > 1: self.multi_gpu = True self.model = torch.nn.DataParallel(self.model, device_ids=device_ids) self.model = self.model.to(device) return device
def main(config): logger = config.get_logger('train') # setup data_loader instances data_loader = config.init_obj('data_loader', module_data) print("Training Size:", data_loader.dataset.data.shape) valid_data_loader = data_loader.split_validation() print("Validation Size:", valid_data_loader.dataset.data.shape) # build model architecture, then print to console model = config.init_obj('arch', module_arch) logger.info(model) # prepare for (multi-device) GPU training device, device_ids = prepare_device(config['n_gpu']) model = model.to(device) if len(device_ids) > 1: model = torch.nn.DataParallel(model, device_ids=device_ids) # get function handles of loss and metrics criterion = getattr(module_loss, config['loss']) metrics = [getattr(module_metric, met) for met in config['metrics']] # build optimizer, learning rate scheduler. delete every lines containing lr_scheduler for disabling scheduler trainable_params = filter(lambda p: p.requires_grad, model.parameters()) optimizer = config.init_obj('optimizer', torch.optim, trainable_params) lr_scheduler = config.init_obj('lr_scheduler', torch.optim.lr_scheduler, optimizer) # construct faults given index and layer try: layers = config['fault']['layer'].split('_') fault_indices = config['fault']['index'] if isinstance(fault_indices, str): fault_indices = [[int(c) for c in fault_index.split(',')] for fault_index in fault_indices.split('_')] times = list(map(int, config['fault']['time'].split('_'))) assert len(times) == len(layers) == len(fault_indices) faults = [] for layer, fault_indice, time in zip(layers, fault_indices, times): fault = Fault(fault_layer=layer, fault_index=fault_indice, time=time) faults.append(fault) except: faults = None trainer = FaultTrainer(model, criterion, metrics, optimizer, config=config, device=device, data_loader=data_loader, valid_data_loader=valid_data_loader, lr_scheduler=lr_scheduler, fault=faults) trainer.train()
def main(config): logger = get_logger(name=__name__, log_dir=config.log_dir, verbosity=config['trainer']['verbosity']) torch.backends.cudnn.benchmark = True if config['seed'] is not None: torch.manual_seed(config['seed']) torch.backends.cudnn.deterministic = True np.random.seed(config['seed']) random.seed(config['seed']) logger.warning('You seeded the training. ' 'This turns on the CUDNN deterministic setting, ' 'which can slow down your training ' 'You may see unexpected behavior when restarting ' 'from checkpoints.') # setup data_loader instances data_loader_obj = config.init_obj('data_loader', module_data) data_loader = data_loader_obj.get_train_loader() valid_data_loader = data_loader_obj.get_valid_loader() # build model architecture, then print to console model = config.init_obj('arch', module_arch) # prepare for (multi-device) GPU training device, device_ids = prepare_device(config['n_gpu']) model = model.to(device) if len(device_ids) > 1: model = torch.nn.DataParallel(model, device_ids=device_ids) trainable_params = filter(lambda p: p.requires_grad, model.parameters()) logger.info( summary(model, input_size=[config['data_loader']['args']['batch_size']] + config['input_size'], verbose=0)) logger.info('Trainable parameters: {}'.format( sum([p.numel() for p in trainable_params]))) # get function handles of loss and metrics criterion = getattr(module_loss, config['loss']) metrics = [getattr(module_metric, met) for met in config['metrics']] # build optimizer, learning rate scheduler. optimizer = config.init_obj('optimizer', torch.optim, model.parameters()) lr_scheduler = config.init_obj('lr_scheduler', torch.optim.lr_scheduler, optimizer) trainer = Trainer(model, criterion, metrics, optimizer, config=config, device=device, data_loader=data_loader, valid_data_loader=valid_data_loader, lr_scheduler=lr_scheduler) trainer.train()
def __init__(self, model, metrics, optimizer, config, train_dataset): self.config = config cfg_trainer: dict = config['trainer'] self.epochs: int = cfg_trainer['epochs'] self.save_start_epoch: int = cfg_trainer.get('save_start_epoch', 1) self.logger = config.get_logger('trainer', cfg_trainer['verbosity']) # setup GPU device if available, move model into configured device self.device, device_ids = prepare_device(config['n_gpu'], self.logger) self.num_devices = max(len(device_ids), 1) self.model = model.to(self.device) if self.num_devices > 1: self.model = torch.nn.DataParallel(model, device_ids=device_ids) max_bpg = self.config['trainer']['max_bpg'] self.batches_per_optim = cfg_trainer['batch_size'] self.gradient_accumulation_steps = math.ceil( self.batches_per_optim / (max_bpg * self.num_devices)) batches_per_step = min(self.batches_per_optim, max_bpg * self.num_devices) if self.gradient_accumulation_steps > 1: self.config['data_loaders']['valid']['args'][ 'batch_size'] = batches_per_step self.batches_per_device = math.ceil(batches_per_step / self.num_devices) self.config['data_loaders']['train']['args'][ 'batch_size'] = batches_per_step self.data_loader = self.config.init_obj('data_loaders.train', module_loader, train_dataset) self.total_step = len(self.data_loader) * self.epochs self.optimization_step_per_epoch = math.ceil( len(self.data_loader) / self.gradient_accumulation_steps) self.total_optimization_step = self.optimization_step_per_epoch * self.epochs self.metrics = metrics self.optimizer = optimizer # configuration to monitor model performance and save best self.monitor: str = cfg_trainer.get('monitor', 'off') if self.monitor == 'off': self.mnt_mode = 'off' self.mnt_best = 0 else: self.mnt_mode, self.mnt_metric = self.monitor.split() assert self.mnt_mode in ['min', 'max'] self.mnt_best = inf if self.mnt_mode == 'min' else -inf self.early_stop = cfg_trainer.get('early_stop', inf) self.start_epoch = 1 if config.resume is not None: self._resume_checkpoint(config.resume)
def main(config): logger = config.get_logger('train') # setup data_loader instances preprocessor = config.init_obj('preprocessor', module_preprocessor) train_dataset = config.init_obj('dataset', module_dataset, preprocessor, mode='xeno', vanilla=True) #test_dataset = config.init_obj('dataset', module_dataset, preprocessor, mode = 'soundscape', vanilla = True) print("Done with datasets") train_data_loader = config.init_obj('data_loader', module_data, train_dataset) valid_data_loader = train_data_loader.split_validation() #test_data_loader = config.init_obj('data_loader', module_data, test_dataset) # build model architecture, then print to console if config['arch']['type'] == 'PretrainedModel': wrap = config.init_obj('arch', module_arch) model = wrap.get_model() else: model = config.init_obj('arch', module_arch) logger.info(model) # prepare for (multi-device) GPU training device, device_ids = prepare_device(config['n_gpu']) model = model.to(device) if len(device_ids) > 1: model = torch.nn.DataParallel(model, device_ids=device_ids) # get function handles of loss and metrics criterion = getattr(module_loss, config['loss']) metrics = [getattr(module_metric, met) for met in config['metrics']] # build optimizer, learning rate scheduler. delete every lines containing lr_scheduler for disabling scheduler trainable_params = filter(lambda p: p.requires_grad, model.parameters()) optimizer = config.init_obj('optimizer', torch.optim, trainable_params) lr_scheduler = config.init_obj('lr_scheduler', torch.optim.lr_scheduler, optimizer) trainer = Trainer(model, criterion, metrics, optimizer, config=config, device=device, data_loader=train_data_loader, valid_data_loader=valid_data_loader, lr_scheduler=lr_scheduler) trainer.train()
def main(config): logger = config.get_logger('train') # setup data_loader instances data_loader = config.init_obj('data_loader', module_data) valid_data_loader = data_loader.split_validation() # build models architecture, then print to console model = config.init_obj('arch', module_arch) logger.info(model) # model load state dict state_dict = torch.load(config.resume) model.load_state_dict(state_dict) # prepare for (multi-device) GPU training device, device_ids = prepare_device(config['n_gpu']) model = model.to(device) if len(device_ids) > 1: model = torch.nn.DataParallel(model, device_ids=device_ids) # get function handles of loss and metrics criterion = getattr(module_loss, config['loss']) metrics = [getattr(module_metric, met) for met in config['metrics']] # build optimizer, learning rate scheduler. delete every lines containing lr_scheduler for disabling scheduler # freeze some layers for transfer learning # for name, param in model.named_parameters(): # if not ('output' in name): # param.requires_grad = False # add the requires_grad parameter to optimizer trainable_params = filter(lambda p: p.requires_grad, model.parameters()) optimizer = config.init_obj('optimizer', torch.optim, trainable_params) lr_scheduler = config.init_obj('lr_scheduler', torch.optim.lr_scheduler, optimizer) trainer = Trainer(model, criterion, metrics, optimizer, config=config, device=device, data_loader=data_loader, valid_data_loader=None, lr_scheduler=lr_scheduler) trainer.train()
def __init__(self, model, criterion, metric_ftns, optimizer, config): self.config = config self.logger = config.get_logger('trainer', config['trainer']['verbosity']) if model is not None: # setup GPU device if available, move model into configured device self.device, device_ids = prepare_device(self.logger, config['n_gpu']) self.model = model.to(self.device) if len(device_ids) > 1: self.model = torch.nn.DataParallel(model, device_ids=device_ids) self.criterion = criterion self.metric_ftns = metric_ftns self.optimizer = optimizer cfg_trainer = config['trainer'] self.epochs = cfg_trainer['epochs'] self.save_period = cfg_trainer['save_period'] self.monitor = cfg_trainer.get('monitor', 'off') # configuration to monitor model performance and save best if self.monitor == 'off': self.mnt_mode = 'off' self.mnt_best = 0 else: self.mnt_mode, self.mnt_metric = self.monitor.split() assert self.mnt_mode in ['min', 'max'] self.mnt_best = inf if self.mnt_mode == 'min' else -inf self.early_stop = cfg_trainer.get('early_stop', inf) self.start_epoch = 1 self.checkpoint_dir = config.save_dir # setup visualization writer instance self.writer = TensorboardWriter(config.log_dir, self.logger, cfg_trainer['tensorboard']) if config.resume is not None: self._resume_checkpoint(config.resume)
def main(): # Training settings parser = argparse.ArgumentParser(description='PyTorch') parser.add_argument('--batch-size', type=int, default=64, metavar='N', help='input batch size for training (default: 64)') parser.add_argument('--test-batch-size', type=int, default=100, metavar='N', help='input batch size for testing (default: 100)') parser.add_argument('--epochs', type=int, default=100000, metavar='N', help='number of epochs to train (default: 10)') parser.add_argument('--lr', type=float, default=0.0001, metavar='LR', help='learning rate (default: 0.0001)') parser.add_argument('--momentum', type=float, default=0.5, metavar='M', help='SGD momentum (default: 0.5)') parser.add_argument('--no-cuda', action='store_true', default=False, help='disables CUDA training') parser.add_argument('--cuda_dev', type=int, default=0, help='select specific CUDA device for training') parser.add_argument('--n_gpu_use', type=int, default=1, help='select number of CUDA device for training') # parser.add_argument('--seed', type=int, default=1, metavar='S', # help='random seed (default: 1)') parser.add_argument('--log-interval', type=int, default=100, metavar='N', help='logging training status cadency') parser.add_argument('--save-model', action='store_true', default=False, help='For Saving the current Model') parser.add_argument('--tensorboard', action='store_true', default=True, help='For logging the model in tensorboard') args = parser.parse_args() use_cuda = not args.no_cuda and torch.cuda.is_available() if not use_cuda: args.n_gpu_use = 0 device = utils.prepare_device(n_gpu_use=args.n_gpu_use, gpu_id=args.cuda_dev) # kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {} # torch.manual_seed(args.seed) # # fix random seeds for reproducibility # SEED = 123 # torch.manual_seed(SEED) # torch.backends.cudnn.deterministic = True # torch.backends.cudnn.benchmark = False # np.random.seed(SEED) # configugations cfg = SemiSupLPGNNWrapper.Config() cfg.use_cuda = use_cuda cfg.device = device cfg.log_interval = args.log_interval cfg.tensorboard = args.tensorboard # cfg.batch_size = args.batch_size # cfg.test_batch_size = args.test_batch_size # cfg.momentum = args.momentum cfg.dataset_path = './data' cfg.epochs = 1000 cfg.activation = nn.LeakyReLU() cfg.state_transition_hidden_dims = [ 150, ] cfg.output_function_hidden_dims = [ 30, ] # cfg.state_dim = [7, 2] cfg.state_dim = [350, 150] cfg.graph_based = False cfg.log_interval = 10 cfg.lrw = 0.001 cfg.lrx = 0.003 cfg.lrλ = 0.003 cfg.task_type = "semisupervised" cfg.layers = len(cfg.state_dim) if type( cfg.state_dim ) is list else 1 # getting number of LPGNN layers from state_dim list # LPGNN cfg.eps = 1e-6 cfg.state_constraint_function = "squared" cfg.loss_w = 0.0005 # model creation - a unique model model = SemiSupLPGNNWrapper(cfg) # dataset creation dset = dataloader.get_dgl_cora(aggregation_type="sum", sparse_matrix=True) # generate the dataset #dset = dataloader.get_dgl_citation(aggregation_type="sum") # generate the dataset #dset = dataloader.get_dgl_karate(aggregation_type="sum") # generate the dataset model(dset) # dataset initalization into the GNN # training code for epoch in range(1, args.epochs + 1): model.global_step(epoch)
if trainer_start_btn: logger = logging.getLogger() seed_everything(trainer_seed) logger.info(trainer) logger.info("seed: {}, save_dir: {}", str(trainer_seed), str(trainer_save_path)) # data data_loader = eval("data_module." + trainer_dataloader + "DataLoader")(**cfg["DATA_LOADER"]["ARGS"]) valid_data_loader = data_loader.split_validation() # model model = eval("model_module." + trainer_model)( **sessions.trainer_params[trainer_id]["model_params"]) # logger.info(model) # gpu device, device_ids = prepare_device(cfg['N_GPU']) model = model.to(device) if len(device_ids) > 1: model = torch.nn.DataParallel(model, device_ids=device_ids) # criterion criterion = eval("loss_module." + trainer_loss) # metrics metrics = [ eval("metric_module." + met) for met in eval(trainer_metrics) ] # optimizer optimizer = eval("optimizer_module." + trainer_optimizer)( **sessions.trainer_params[trainer_id]["optimizer_params"], params=model.parameters())
def main(): # Training settings parser = argparse.ArgumentParser(description='PyTorch') parser.add_argument('--epochs', type=int, default=100000, metavar='N', help='number of epochs to train (default: 10)') parser.add_argument('--lr', type=float, default=0.0001, metavar='LR', help='learning rate (default: 0.0001)') parser.add_argument('--no-cuda', action='store_true', default=False, help='disables CUDA training') parser.add_argument('--cuda_dev', type=int, default=0, help='select specific CUDA device for training') parser.add_argument('--n_gpu_use', type=int, default=1, help='select number of CUDA device for training') # parser.add_argument('--seed', type=int, default=1, metavar='S', # help='random seed (default: 1)') parser.add_argument('--log-interval', type=int, default=50, metavar='N', help='logging training status cadency') # parser.add_argument('--save-model', action='store_true', default=False, # help='For Saving the current Model') parser.add_argument('--tensorboard', action='store_true', default=False, help='For logging the model in tensorboard') args = parser.parse_args() use_cuda = not args.no_cuda and torch.cuda.is_available() if not use_cuda: args.n_gpu_use = 0 device = utils.prepare_device(n_gpu_use=args.n_gpu_use, gpu_id=args.cuda_dev) # kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {} # configugations cfg = LPGNNWrapper.Config() cfg.use_cuda = use_cuda cfg.device = device # cfg.seed = SEED cfg.log_interval = args.log_interval cfg.tensorboard = args.tensorboard cfg.dataset_path = './data' cfg.epochs = args.epochs cfg.activation = nn.Tanh() cfg.state_transition_hidden_dims = [10, ] cfg.output_function_hidden_dims = [3, ] # cfg.state_dim = [7, 2] cfg.state_dim = [5, ] cfg.graph_based = False cfg.log_interval = 300 cfg.lrw = 0.01 cfg.lrx = 0.03 cfg.lrλ = 0.01 cfg.task_type = "semisupervised" cfg.layers = len(cfg.state_dim) if type( cfg.state_dim) is list else 1 # getting number of LPGNN layers from state_dim list # LPGNN cfg.eps = 1e-6 cfg.state_constraint_function = "eps" cfg.loss_w = 0.001 # model creation - a unique model model = SemiSupLPGNNWrapper(cfg) # dataset creation #dset = dataloader.get_karate(aggregation_type="sum", sparse_matrix=True) # generate the dataset # dset = dataloader.get_twochainsSSE(aggregation_type="sum", percentage=0.1, sparse_matrix=True) # generate the dataset dset = dataloader.get_twochains(num_nodes_per_graph= 1000, pct_labels= .2, pct_valid= .2,sparse_matrix=True) # generate the dataset model(dset) # dataset initalization into the GNN import time start_get = time.time() # training code for epoch in range(args.epochs): model.global_step(epoch, start_get)
def main(): # Training settings parser = argparse.ArgumentParser(description='PyTorch') parser.add_argument('--epochs', type=int, default=10000, metavar='N', help='number of epochs to train (default: 10)') parser.add_argument('--lr', type=float, default=0.0001, metavar='LR', help='learning rate (default: 0.0001)') parser.add_argument('--no-cuda', action='store_true', default=False, help='disables CUDA training') parser.add_argument('--cuda_dev', type=int, default=0, help='select specific CUDA device for training') parser.add_argument('--n_gpu_use', type=int, default=1, help='select number of CUDA device for training') parser.add_argument('--log-interval', type=int, default=50, metavar='N', help='logging training status cadency') parser.add_argument('--tensorboard', action='store_true', default=True, help='For logging the model in tensorboard') args = parser.parse_args() use_cuda = not args.no_cuda and torch.cuda.is_available() if not use_cuda: args.n_gpu_use = 0 device = utils.prepare_device(n_gpu_use=args.n_gpu_use, gpu_id=args.cuda_dev) # kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {} # torch.manual_seed(args.seed) # # fix random seeds for reproducibility # SEED = 123 # torch.manual_seed(SEED) # torch.backends.cudnn.deterministic = True # torch.backends.cudnn.benchmark = False # np.random.seed(SEED) # configugations cfg = GNNWrapper.Config() cfg.use_cuda = use_cuda cfg.device = device cfg.log_interval = args.log_interval cfg.tensorboard = args.tensorboard # cfg.batch_size = args.batch_size # cfg.test_batch_size = args.test_batch_size # cfg.momentum = args.momentum cfg.dataset_path = './data' cfg.epochs = args.epochs cfg.lrw = args.lr cfg.activation = nn.Sigmoid() cfg.state_transition_hidden_dims = [ 10, ] cfg.output_function_hidden_dims = [5] cfg.state_dim = 10 # cfg.max_iterations = 50 cfg.convergence_threshold = 0.01 cfg.graph_based = False cfg.log_interval = 10 cfg.lrw = 0.01 cfg.task_type = "multiclass" # model creation # model_tr = GNNWrapper(cfg) # model_val = GNNWrapper(cfg) # model_tst = GNNWrapper(cfg) cfg.dset_name = "sub_30_15_200" cfg.aggregation_type = "degreenorm" # dataset creation dset = dataloader.get_subgraph(set=cfg.dset_name, aggregation_type=cfg.aggregation_type, sparse_matrix=True) # generate the dataset cfg.label_dim = dset["train"].node_label_dim state_nets = [ net.StateTransition(cfg.state_dim, cfg.label_dim, mlp_hidden_dim=cfg.state_transition_hidden_dims, activation_function=cfg.activation), net.GINTransition(cfg.state_dim, cfg.label_dim, mlp_hidden_dim=cfg.state_transition_hidden_dims, activation_function=cfg.activation), net.GINPreTransition(cfg.state_dim, cfg.label_dim, mlp_hidden_dim=cfg.state_transition_hidden_dims, activation_function=cfg.activation) ] lrs = [0.05, 0.01, 0.001] hyperparameters = dict(lr=lrs, state_net=state_nets) hyperparameters_values = [v for v in hyperparameters.values()] start_0 = time.time() for lr, state_net in product(*hyperparameters_values): cfg.lrw = lr cfg.state_net = state_net print( f"learning_rate:{lr}, state_dim:{cfg.state_dim}, aggregation function:{str(state_net).split('(')[0]} " ) # model creation model_tr = GNNWrapper(cfg) model_val = GNNWrapper(cfg) model_tst = GNNWrapper(cfg) # 24.3.21 STOPPER early_stopper = utils.EarlyStopper(cfg) model_tr(dset["train"], state_net=state_net) # dataset initalization into the GNN model_val(dset["validation"], state_net=model_tr.gnn.state_transition_function, out_net=model_tr.gnn.output_function ) # dataset initalization into the GNN model_tst(dset["test"], state_net=model_tr.gnn.state_transition_function, out_net=model_tr.gnn.output_function ) # dataset initalization into the GNN # training code start = time.time() for epoch in range(1, args.epochs + 1): acc_train = model_tr.train_step(epoch) if epoch % 10 == 0: acc_tst = model_tst.test_step(epoch) acc_val = model_val.valid_step(epoch) stp = early_stopper(acc_train, acc_val, acc_tst, epoch) # return -1 keeps training the model! if stp == -1: print( f"{early_stopper.best_epoch}, \t {early_stopper.best_train}, \t, {early_stopper.best_val}, \t {early_stopper.best_test}" ) break # model_tst.test_step(epoch) time_sample = time.time() - start print(f"time taken for one set: {str(time_sample)} seconds") time_whole = time.time() - start_0 print(f"time taken for the whole experiment: {str(time_whole)} seconds")
def send_model2device(self, str_cuda_idxs): device, device_ids = prepare_device(str_cuda_idxs) assert len(device_ids) <= 1, \ "multi-gpu mode for computing embeddings is not supported now" self.model = self.model.to(device) return device
def main(config): # set random seed torch.manual_seed(config.seed) np.random.seed(config.seed) random.seed(config.seed) # prepare hardware accelearation device, gpu_device_ids = prepare_device(config.num_gpu) if "cuda" in str(device): cp.print_green(f"utilizing gpu devices : {gpu_device_ids}") torch.cuda.manual_seed(config.seed) # Preapre model model_config = load_json(config.model_config) model_class = getattr(model_modules, model_config["model"]) model = model_class(model_config["config"]) trained_model = model_config["trained_model"] cp.print_green(f"pretrained model: {trained_model}") model.load_state_dict(torch.load(trained_model, map_location=device), strict=False) cp.print_green("model:\n", model) activation = None if "activation" in model_config: activation = getattr(torch, model_config["activation"], None) cp.print_green("activation: ", type(activation).__name__) # Prepare DataLoader data_config = load_json(config.data_config) data_loader_class = getattr(data_loader_modules, data_config["data_loader"]) # Preapre file handler output_path = config.output_folder + "/" + type(model).__name__ cp.print_green(f"file type: {config.file_type}") cp.print_green(f"output folder: {output_path}") # Prepare extraction if len(gpu_device_ids) > 1: model = torch.nn.DataParallel(model, device_ids=gpu_device_ids) model.eval() model.to(device) # Extract train features train_data_loader = data_loader_class(data_config["train_config"]) train_file_handler = file_handler_modules.handler_mapping[ config.file_type](output_path + "/train") meta = extract_feature(model, activation, train_data_loader, train_file_handler, device) cp.print_green('train meta file:\n', meta) train_file_handler.generate_meta_file(meta) del train_file_handler, train_data_loader # Extract test features test_data_loader = data_loader_class(data_config["test_config"]) test_file_handler = file_handler_modules.handler_mapping[config.file_type]( output_path + "/test") meta = extract_feature(model, activation, test_data_loader, test_file_handler, device) cp.print_green('test meta file:\n', meta) test_file_handler.generate_meta_file(meta)
def main(args): if not check_exists(args.save_dir): os.makedirs(args.save_dir) dataset = IQiYiFineTuneSceneDataset(args.data_root, 'train+val-noise', image_root='/home/dcq/img') data_loader = DataLoader(dataset, batch_size=args.batch_size, shuffle=True, num_workers=4) log_step = len(data_loader) // 10 if len(data_loader) > 10 else 1 model = ArcFaceSEResNeXtModel(args.num_classes, include_top=True) metric_func = ArcMarginProduct() loss_func = FocalLoss(gamma=2.) trainable_params = [ { 'params': model.base_model.parameters(), "lr": args.learning_rate / 100 }, { 'params': model.weight }, ] optimizer = optim.SGD(trainable_params, lr=args.learning_rate, momentum=0.9, weight_decay=1e-5) lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, args.epoch) device, device_ids = prepare_device() model = model.to(device) if len(device_ids) > 1: model = torch.nn.DataParallel(model, device_ids=device_ids) for epoch_idx in range(args.epoch): total_loss = .0 for batch_idx, (images, labels, _) in enumerate(data_loader): images = images.view(-1, *images.size()[-3:]) images = images.to(device) labels = labels.to(device) optimizer.zero_grad() outputs = model(images) outputs = outputs.view(outputs.size(0) // 3, 3, -1) outputs = torch.mean(outputs, dim=1) outputs_metric = metric_func(outputs, labels) local_loss = loss_func(outputs_metric, labels) local_loss.backward() optimizer.step() total_loss += local_loss.item() if batch_idx % log_step == 0 and batch_idx != 0: print('Epoch: {} [{}/{} ({:.0f}%)] Loss: {:.6f}'.format( epoch_idx, batch_idx * args.batch_size, len(dataset), 100.0 * batch_idx / len(data_loader), local_loss.item())) log = { 'epoch': epoch_idx, 'lr': optimizer.param_groups[0]['lr'], 'loss': total_loss / len(data_loader) } for key, value in sorted(log.items(), key=lambda item: item[0]): print(' {:20s}: {:6f}'.format(str(key), value)) lr_scheduler.step() save_model(model.module, args.save_dir, 'demo_arcface_fine_tune_model', args.epoch)
def test(config): # setup GPU device if available, move model into configured device device, device_ids = prepare_device(config["n_gpu"]) # datasets test_datasets = dict() keys = ["datasets", "test"] for name in get_by_path(config, keys): test_datasets[name] = config.init_obj([*keys, name]) results = pd.DataFrame() k_fold = config["k_fold"] Cross_Valid.create_CV(k_fold) start = time.time() for k in range(k_fold): # data_loaders test_data_loaders = dict() keys = ["data_loaders", "test"] for name in get_by_path(config, keys): dataset = test_datasets[name] loaders = config.init_obj([*keys, name], dataset) test_data_loaders[name] = loaders.test_loader # models if k_fold > 1: fold_prefix = f"fold_{k}_" dirname = os.path.dirname(config.resume) basename = os.path.basename(config.resume) resume = os.path.join(dirname, fold_prefix + basename) else: resume = config.resume logger.info(f"Loading model: {resume} ...") checkpoint = torch.load(resume) models = dict() logger_model = get_logger("model", verbosity=0) for name in config["models"]: model = config.init_obj(["models", name]) logger_model.info(model) state_dict = checkpoint["models"][name] if config["n_gpu"] > 1: model = torch.nn.DataParallel(model) model.load_state_dict(state_dict) model = model.to(device) model.eval() models[name] = model model = models["model"] # losses loss_fn = config.init_obj(["losses", "loss"]) # metrics metrics_epoch = [ getattr(module_metric, met) for met in config["metrics"]["per_epoch"] ] keys_epoch = [m.__name__ for m in metrics_epoch] test_metrics = MetricTracker([], keys_epoch) if "pick_threshold" in config["metrics"]: threshold = checkpoint["threshold"] setattr(module_metric, "THRESHOLD", threshold) logger.info(f"threshold: {threshold}") with torch.no_grad(): print("testing...") test_loader = test_data_loaders["data"] if len(metrics_epoch) > 0: outputs = torch.FloatTensor().to(device) targets = torch.FloatTensor().to(device) for batch_idx, (data, target) in enumerate(test_loader): data, target = data.to(device), target.to(device) output = model(data) if len(metrics_epoch) > 0: outputs = torch.cat((outputs, output)) targets = torch.cat((targets, target)) # # save sample images, or do something with output here # for met in metrics_epoch: test_metrics.epoch_update(met.__name__, met(targets, outputs)) test_log = test_metrics.result() test_log = test_log["mean"].rename(k) results = pd.concat((results, test_log), axis=1) logger.info(test_log) # cross validation if k_fold > 1: Cross_Valid.next_fold() msg = msg_box("result") end = time.time() total_time = consuming_time(start, end) msg += f"\nConsuming time: {total_time}." result = pd.DataFrame() result["mean"] = results.mean(axis=1) result["std"] = results.std(axis=1) msg += f"\n{result}" logger.info(msg) # bootstrap if config.test_args.bootstrapping: assert k_fold == 1, "k-fold ensemble and bootstrap are mutually exclusive." N = config.test_args.bootstrap_times bootstrapping(targets, outputs, metrics_epoch, test_metrics, repeat=N)
def main(config): logger = config.get_logger('trainer', config['trainer']['verbosity']) # print logged informations to the screen pprint.pprint(config.config) trainDataLoader = config.init_obj('data_loader', dataset_classes) X, y = next(iter(trainDataLoader)) print(X.shape) config_test = deepcopy(config) config_test.config['data_loader']['training'] = False valDataLoader = config_test.init_obj('data_loader', dataset_classes) model = config.init_obj('arch', model_classes) logger.info(model) # prepare for (multi-device) GPU training device, device_ids = prepare_device(config['n_gpu']) model = model.to(device) if len(device_ids) > 1: model = torch.nn.DataParallel(model, device_ids=device_ids) logger.info(device) logger.info(device_ids) # get function handles of loss and metrics criterion = getattr(losses, config['loss']) logger.info(criterion) metrics = [getattr(Metrics, met) for met in config['metrics']] logger.info(metrics) # build optimizer, learning rate scheduler. delete every lines containing lr_scheduler for disabling scheduler trainable_params = filter(lambda p: p.requires_grad, model.parameters()) optimizer = config.init_obj('optimizer', torch.optim, trainable_params) logger.info(optimizer) lr_scheduler = config.init_obj('lr_scheduler', torch.optim.lr_scheduler, optimizer) logger.info(lr_scheduler) cfg_trainer = config['trainer'] train_writer = TensorboardWriter(config.log_dir, logger, config['visualization']['tensorboardX']) train_metrics = MetricTracker('loss', *[m.__name__ for m in metrics], writer=train_writer) val_writer = TensorboardWriter(config.log_dir, logger, config['visualization']['tensorboardX']) val_metrics = MetricTracker('loss', *[m.__name__ for m in metrics], writer=train_writer) for idx in range(1, cfg_trainer['epochs'] + 1): log = train_epoch(model, trainDataLoader, device, optimizer, lr_scheduler, logger, train_metrics, criterion, idx, train_writer, metrics) logger.info(log) val_log = validation_epoch(model, valDataLoader, device, logger, val_metrics, criterion, idx, val_writer, metrics) logger.info(val_log)
def main(config): result_name = '{}_{}_{}way_{}shot'.format( config['data_name'], config['arch']['base_model'], config['general']['way_num'], config['general']['shot_num'], ) save_path = os.path.join(config['general']['save_root'], result_name) if not os.path.exists(save_path): os.mkdir(save_path) fout_path = os.path.join(save_path, 'train_info.txt') fout_file = open(fout_path, 'a+') with open(os.path.join(save_path, 'config.json'), 'w') as handle: json.dump(config, handle, indent=4, sort_keys=True) print_func(config, fout_file) train_trsfms = transforms.Compose([ transforms.Resize((config['general']['image_size'], config['general']['image_size'])), transforms.ToTensor(), transforms.Normalize(mean=mean, std=std), ]) val_trsfms = transforms.Compose([ transforms.Resize((config['general']['image_size'], config['general']['image_size'])), transforms.ToTensor(), transforms.Normalize(mean=mean, std=std), ]) model = ALTNet(**config['arch']) print_func(model, fout_file) optimizer = optim.Adam(model.parameters(), lr=config['train']['optim_lr']) if config['train']['lr_scheduler']['name'] == 'StepLR': lr_scheduler = optim.lr_scheduler.StepLR( optimizer=optimizer, **config['train']['lr_scheduler']['args']) elif config['train']['lr_scheduler']['name'] == 'MultiStepLR': lr_scheduler = optim.lr_scheduler.MultiStepLR( optimizer=optimizer, **config['train']['lr_scheduler']['args']) else: raise RuntimeError if config['train']['loss']['name'] == 'CrossEntropyLoss': criterion = nn.CrossEntropyLoss(**config['train']['loss']['args']) else: raise RuntimeError device, _ = prepare_device(config['n_gpu']) model = model.to(device) criterion = criterion.to(device) best_val_prec1 = 0 best_test_prec1 = 0 for epoch_index in range(config['train']['epochs']): print_func('{} Epoch {} {}'.format('=' * 35, epoch_index, '=' * 35), fout_file) train_dataset = ImageFolder( data_root=config['general']['data_root'], mode='train', episode_num=config['train']['episode_num'], way_num=config['general']['way_num'], shot_num=config['general']['shot_num'], query_num=config['general']['query_num'], transform=train_trsfms, ) val_dataset = ImageFolder( data_root=config['general']['data_root'], mode='val', episode_num=config['test']['episode_num'], way_num=config['general']['way_num'], shot_num=config['general']['shot_num'], query_num=config['general']['query_num'], transform=val_trsfms, ) test_dataset = ImageFolder( data_root=config['general']['data_root'], mode='test', episode_num=config['test']['episode_num'], way_num=config['general']['way_num'], shot_num=config['general']['shot_num'], query_num=config['general']['query_num'], transform=val_trsfms, ) print_func( 'The num of the train_dataset: {}'.format(len(train_dataset)), fout_file) print_func('The num of the val_dataset: {}'.format(len(val_dataset)), fout_file) print_func('The num of the test_dataset: {}'.format(len(test_dataset)), fout_file) train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=config['train']['batch_size'], shuffle=True, num_workers=config['general']['workers_num'], drop_last=True, pin_memory=True) val_loader = torch.utils.data.DataLoader( val_dataset, batch_size=config['test']['batch_size'], shuffle=True, num_workers=config['general']['workers_num'], drop_last=True, pin_memory=True) test_loader = torch.utils.data.DataLoader( test_dataset, batch_size=config['test']['batch_size'], shuffle=True, num_workers=config['general']['workers_num'], drop_last=True, pin_memory=True) # train for 5000 episodes in each epoch print_func('============ Train on the train set ============', fout_file) train(train_loader, model, criterion, optimizer, epoch_index, device, fout_file, config['general']['image2level'], config['general']['print_freq']) print_func('============ Validation on the val set ============', fout_file) val_prec1 = validate(val_loader, model, criterion, epoch_index, device, fout_file, config['general']['image2level'], config['general']['print_freq']) print_func( ' * Prec@1 {:.3f} Best Prec1 {:.3f}'.format( val_prec1, best_val_prec1), fout_file) print_func('============ Testing on the test set ============', fout_file) test_prec1 = validate(test_loader, model, criterion, epoch_index, device, fout_file, config['general']['image2level'], config['general']['print_freq']) print_func( ' * Prec@1 {:.3f} Best Prec1 {:.3f}'.format( test_prec1, best_test_prec1), fout_file) if val_prec1 > best_val_prec1: best_val_prec1 = val_prec1 best_test_prec1 = test_prec1 save_model(model, save_path, config['data_name'], epoch_index, is_best=True) if epoch_index % config['general'][ 'save_freq'] == 0 and epoch_index != 0: save_model(model, save_path, config['data_name'], epoch_index, is_best=False) lr_scheduler.step() print_func('............Training is end............', fout_file)
def main(): # Training settings parser = argparse.ArgumentParser(description='PyTorch') parser.add_argument('--epochs', type=int, default=100000, metavar='N', help='number of epochs to train (default: 10)') parser.add_argument('--lr', type=float, default=0.0001, metavar='LR', help='learning rate (default: 0.0001)') parser.add_argument('--no-cuda', action='store_true', default=False, help='disables CUDA training') parser.add_argument('--cuda_dev', type=int, default=0, help='select specific CUDA device for training') parser.add_argument('--n_gpu_use', type=int, default=1, help='select number of CUDA device for training') parser.add_argument('--log-interval', type=int, default=50, metavar='N', help='logging training status cadency') parser.add_argument('--tensorboard', action='store_true', default=True, help='For logging the model in tensorboard') args = parser.parse_args() use_cuda = not args.no_cuda and torch.cuda.is_available() if not use_cuda: args.n_gpu_use = 0 device = utils.prepare_device(n_gpu_use=args.n_gpu_use, gpu_id=args.cuda_dev) # kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {} # torch.manual_seed(args.seed) # # fix random seeds for reproducibility # SEED = 123 # torch.manual_seed(SEED) # torch.backends.cudnn.deterministic = True # torch.backends.cudnn.benchmark = False # np.random.seed(SEED) # configugations cfg = GNNWrapper.Config() cfg.use_cuda = use_cuda cfg.device = device cfg.log_interval = args.log_interval cfg.tensorboard = args.tensorboard # cfg.batch_size = args.batch_size # cfg.test_batch_size = args.test_batch_size # cfg.momentum = args.momentum cfg.dataset_path = './data' cfg.epochs = args.epochs cfg.lrw = args.lr cfg.activation = nn.Sigmoid() cfg.state_transition_hidden_dims = [ 10, ] cfg.output_function_hidden_dims = [5] cfg.state_dim = 10 cfg.max_iterations = 50 cfg.convergence_threshold = 0.01 cfg.graph_based = False cfg.log_interval = 10 cfg.lrw = 0.01 cfg.task_type = "multiclass" # model creation model_tr = GNNWrapper(cfg) model_val = GNNWrapper(cfg) model_tst = GNNWrapper(cfg) # dataset creation dset = dataloader.get_subgraph(set="cli_15_7_200", aggregation_type="sum", sparse_matrix=True) # generate the dataset model_tr(dset["train"]) # dataset initalization into the GNN model_val(dset["validation"], state_net=model_tr.gnn.state_transition_function, out_net=model_tr.gnn.output_function ) # dataset initalization into the GNN model_tst(dset["test"], state_net=model_tr.gnn.state_transition_function, out_net=model_tr.gnn.output_function ) # dataset initalization into the GNN # training code for epoch in range(1, args.epochs + 1): model_tr.train_step(epoch) if epoch % 10 == 0: model_tst.test_step(epoch) model_val.valid_step(epoch)
plot_graph(E1, N1) E = np.concatenate((E, np.asarray(e2)), axis=0) N_tot = np.eye(edges + edges_2, dtype=np.float32) N_tot = np.concatenate((N_tot, np.zeros( (edges + edges_2, 1), dtype=np.float32)), axis=1) # Create Input to GNN labels = np.random.randint(2, size=(N_tot.shape[0])) #labels = np.eye(max(labels)+1, dtype=np.int32)[labels] # one-hot encoding of labels cfg = GNNWrapper.Config() cfg.use_cuda = True cfg.device = utils.prepare_device(n_gpu_use=1, gpu_id=0) cfg.tensorboard = False cfg.epochs = 500 cfg.activation = nn.Tanh() cfg.state_transition_hidden_dims = [ 5, ] cfg.output_function_hidden_dims = [5] cfg.state_dim = 5 cfg.max_iterations = 50 cfg.convergence_threshold = 0.01 cfg.graph_based = False cfg.log_interval = 10 cfg.task_type = "multiclass" cfg.lrw = 0.001
def main(): # Training settings parser = argparse.ArgumentParser(description='PyTorch') parser.add_argument('--batch-size', type=int, default=64, metavar='N', help='input batch size for training (default: 64)') parser.add_argument('--test-batch-size', type=int, default=100, metavar='N', help='input batch size for testing (default: 100)') parser.add_argument('--epochs', type=int, default=100000, metavar='N', help='number of epochs to train (default: 10)') parser.add_argument('--lr', type=float, default=0.0001, metavar='LR', help='learning rate (default: 0.0001)') parser.add_argument('--momentum', type=float, default=0.5, metavar='M', help='SGD momentum (default: 0.5)') parser.add_argument('--no-cuda', action='store_true', default=False, help='disables CUDA training') parser.add_argument('--cuda_dev', type=int, default=0, help='select specific CUDA device for training') parser.add_argument('--n_gpu_use', type=int, default=1, help='select number of CUDA device for training') # parser.add_argument('--seed', type=int, default=1, metavar='S', # help='random seed (default: 1)') parser.add_argument('--log-interval', type=int, default=50, metavar='N', help='logging training status cadency') parser.add_argument('--save-model', action='store_true', default=False, help='For Saving the current Model') parser.add_argument('--tensorboard', action='store_true', default=True, help='For logging the model in tensorboard') args = parser.parse_args() use_cuda = not args.no_cuda and torch.cuda.is_available() if not use_cuda: args.n_gpu_use = 0 device = utils.prepare_device(n_gpu_use=args.n_gpu_use, gpu_id=args.cuda_dev) # kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {} # torch.manual_seed(args.seed) # # fix random seeds for reproducibility # SEED = 123 # torch.manual_seed(SEED) # torch.backends.cudnn.deterministic = True # torch.backends.cudnn.benchmark = False # np.random.seed(SEED) # configugations cfg = GNNWrapper.Config() cfg.use_cuda = use_cuda cfg.device = device cfg.log_interval = args.log_interval cfg.tensorboard = args.tensorboard # cfg.batch_size = args.batch_size # cfg.test_batch_size = args.test_batch_size # cfg.momentum = args.momentum cfg.dataset_path = './data' cfg.epochs = args.epochs cfg.lrw = args.lr cfg.activation = nn.Tanh() cfg.state_transition_hidden_dims = [5,] cfg.output_function_hidden_dims = [5] cfg.state_dim = 2 cfg.max_iterations = 50 cfg.convergence_threshold = 0.01 cfg.graph_based = False cfg.log_interval = 10 cfg.task_type = "semisupervised" cfg.lrw = 0.001 # model creation model = SemiSupGNNWrapper(cfg) # dataset creation E, N, targets, mask_train, mask_test = dataloader.old_load_karate() dset = dataloader.from_EN_to_GNN(E, N, targets, aggregation_type="sum", sparse_matrix=True) # generate the dataset dset.idx_train = mask_train dset.idx_test = mask_test model(dset) # dataset initalization into the GNN # training code for epoch in range(1, args.epochs + 1): model.train_step(epoch) if epoch % 10 == 0: model.test_step(epoch)
def main(): # Training settings parser = argparse.ArgumentParser(description='PyTorch') parser.add_argument('--batch-size', type=int, default=64, metavar='N', help='input batch size for training (default: 64)') parser.add_argument('--test-batch-size', type=int, default=100, metavar='N', help='input batch size for testing (default: 100)') parser.add_argument('--epochs', type=int, default=300, metavar='N', help='number of epochs to train (default: 10)') parser.add_argument('--lr', type=float, default=0.0001, metavar='LR', help='learning rate (default: 0.0001)') parser.add_argument('--momentum', type=float, default=0.5, metavar='M', help='SGD momentum (default: 0.5)') parser.add_argument('--no-cuda', action='store_true', default=False, help='disables CUDA training') parser.add_argument('--cuda_dev', type=int, default=0, help='select specific CUDA device for training') parser.add_argument('--n_gpu_use', type=int, default=1, help='select number of CUDA device for training') # parser.add_argument('--seed', type=int, default=1, metavar='S', # help='random seed (default: 1)') parser.add_argument('--log-interval', type=int, default=50, metavar='N', help='logging training status cadency') parser.add_argument('--save-model', action='store_true', default=False, help='For Saving the current Model') parser.add_argument('--tensorboard', action='store_true', default=True, help='For logging the model in tensorboard') args = parser.parse_args() use_cuda = not args.no_cuda and torch.cuda.is_available() if not use_cuda: args.n_gpu_use = 0 device = utils.prepare_device(n_gpu_use=args.n_gpu_use, gpu_id=args.cuda_dev) # kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {} # torch.manual_seed(args.seed) # # fix random seeds for reproducibility # SEED = 123 # torch.manual_seed(SEED) # torch.backends.cudnn.deterministic = True # torch.backends.cudnn.benchmark = False # np.random.seed(SEED) # configugations cfg = GNNWrapper.Config() cfg.use_cuda = use_cuda cfg.device = device cfg.log_interval = args.log_interval cfg.tensorboard = args.tensorboard # cfg.batch_size = args.batch_size # cfg.test_batch_size = args.test_batch_size # cfg.momentum = args.momentum cfg.dataset_path = './data' cfg.epochs = args.epochs cfg.lrw = args.lr cfg.activation = nn.Tanh() cfg.state_transition_hidden_dims = [4] cfg.output_function_hidden_dims = [] cfg.state_dim = 2 cfg.max_iterations = 50 cfg.convergence_threshold = 0.001 cfg.graph_based = False cfg.log_interval = 10 cfg.task_type = "semisupervised" cfg.lrw = 0.01 # model creation model = SemiSupGNNWrapper(cfg) # dataset creation dset = dataloader.get_karate(aggregation_type="sum", sparse_matrix=True) # generate the dataset #dset = dataloader.get_twochainsSSE(aggregation_type="sum", percentage=0.1, sparse_matrix=True) # generate the dataset model(dset) # dataset initalization into the GNN # training code # plotting utilities all_states = [] all_outs = [] for epoch in range(1, args.epochs + 1): out = model.train_step(epoch) all_states.append(model.gnn.converged_states.detach().to("cpu")) all_outs.append(out.detach().to("cpu")) if epoch % 10 == 0: model.test_step(epoch) # model.test_step() # if args.save_model: # torch.save(model.gnn.state_dict(), "mnist_cnn.pt") import matplotlib.animation as animation import matplotlib.pyplot as plt import networkx as nx nx_G = nx.karate_club_graph().to_directed() def draw(i): clscolor = ['#FF0000', '#0000FF', '#FF00FF', '#00FF00'] pos = {} colors = [] for v in range(34): pos[v] = all_states[i][v].numpy() cls = all_outs[i][v].argmax(axis=-1) # colors.append(clscolor[cls]) # print(clscolor[targets[v]]) colors.append(clscolor[dset.targets[v]]) ax.cla() ax.axis('off') ax.set_title('Epoch: %d' % i) # node_sha = ["o" for i in range(34)] # for j in idx_train: # node_sha[j] = "s" node_sizes = np.full((34), 200) node_sizes[dset.idx_train.detach().to("cpu").numpy()] = 350 nx.draw_networkx(nx_G.to_undirected(), pos, node_color=colors, with_labels=True, node_size=node_sizes, ax=ax) # nx.draw_networkx(nx_G.to_undirected().subgraph(idx_train), pos, node_color=[colors[k] for k in idx_train], node_shape='s', # with_labels=True, node_size=300, ax=ax) fig = plt.figure(dpi=150) fig.clf() ax = fig.subplots() draw(0) # draw the prediction of the first epoch plt.close() ani = animation.FuncAnimation(fig, draw, frames=len(all_states), interval=200) ani.save('learning.mp4', fps=30, extra_args=['-vcodec', 'libx264'])
def train_main(config): """ 训练函数 :param config: ConfigParser对象 :return: None """ logger = config.get_logger('train') # 训练数据的日志对象 data_manager = CSVDataManager( config['data_loader']) # 将json文件中指示数据载入要求信息传入CSV管理器中,装载训练集与测试集 classes = data_manager.classes # 获取所有类别 num_classes = len(classes) # 知晓类别数量 trans_type = config['transforms']['type'] # 变换器名字 trans_args = config['transforms']['args'] # 变换器参数 transformation = getattr(data_module, trans_type)(trans_args) # 对数据作变换 train_data = data_manager.get_loader('train', transformation) # 得到训练集 val_data = data_manager.get_loader('val', transforms=None) # 得到验证集 model_name = config['model'] # 从json文件中获取模型名称 model = ModelCalled(model_name, num_classes=num_classes) # 召唤模型 logger.info(model) # 记录模型的信息 # 为多GPU训练做准备 device, device_ids = prepare_device(config['n_gpu']) model = model.to(device) if len(device_ids) > 1: model = torch.nn.DataParallel(model, device_ids=device_ids) # if torch.cuda.is_available(): # 检测是否能用GPU运算 # model = model.cuda() # 将模型转移到GPU上去 loss = getattr(net_utils, config['loss']) # 获取损失函数 metrics = [getattr(net_utils, met) for met in config['metrics']] # 多分类评价标准需要传进类别数 trainable_params = filter( lambda p: p.requires_grad, model.parameters()) # filter函数用于过滤序列,过滤掉不符合条件的元素,返回由符合条件元素组成的新列表 optim_name = config['optimizer']['type'] # 优化器名字 optim_args = config['optimizer']['args'] # 优化器参数 optimizer = getattr(torch.optim, optim_name)(trainable_params, **optim_args) lr_name = config['lr_scheduler']['type'] # 学习率 lr_args = config['lr_scheduler']['args'] # 学习率参数 if lr_name == 'None': lr_scheduler = None else: lr_scheduler = getattr(torch.optim.lr_scheduler, lr_name)(optimizer, **lr_args) trainer = Trainer(model=model, loss=loss, metrics=metrics, optimizer=optimizer, config=config, data_loader=train_data, valid_data_loader=val_data, lr_scheduler=lr_scheduler, device=device) trainer.train()
def main(result_path, epoch_num): config = json.load(open(os.path.join(result_path, 'config.json'))) fout_path = os.path.join(result_path, 'test_info.txt') fout_file = open(fout_path, 'a+') print_func(config, fout_file) trsfms = transforms.Compose([ transforms.Resize((config['general']['image_size'], config['general']['image_size'])), transforms.ToTensor(), transforms.Normalize(mean=mean, std=std), ]) model = ALTNet(**config['arch']) print_func(model, fout_file) state_dict = torch.load( os.path.join(result_path, '{}_best_model.pth'.format(config['data_name']))) model.load_state_dict(state_dict) if config['train']['loss']['name'] == 'CrossEntropyLoss': criterion = nn.CrossEntropyLoss(**config['train']['loss']['args']) else: raise RuntimeError device, _ = prepare_device(config['n_gpu']) model = model.to(device) criterion = criterion.to(device) total_accuracy = 0.0 total_h = np.zeros(epoch_num) total_accuracy_vector = [] for epoch_idx in range(epoch_num): test_dataset = ImageFolder( data_root=config['general']['data_root'], mode='test', episode_num=600, way_num=config['general']['way_num'], shot_num=config['general']['shot_num'], query_num=config['general']['query_num'], transform=trsfms, ) print_func('The num of the test_dataset: {}'.format(len(test_dataset)), fout_file) test_loader = torch.utils.data.DataLoader( test_dataset, batch_size=config['test']['batch_size'], shuffle=True, num_workers=config['general']['workers_num'], drop_last=True, pin_memory=True) print_func('============ Testing on the test set ============', fout_file) _, accuracies = validate(test_loader, model, criterion, epoch_idx, device, fout_file, config['general']['image2level'], config['general']['print_freq']) test_accuracy, h = mean_confidence_interval(accuracies) print_func("Test Accuracy: {}\t h: {}".format(test_accuracy, h[0]), fout_file) total_accuracy += test_accuracy total_accuracy_vector.extend(accuracies) total_h[epoch_idx] = h aver_accuracy, _ = mean_confidence_interval(total_accuracy_vector) print_func( 'Aver Accuracy: {:.3f}\t Aver h: {:.3f}'.format( aver_accuracy, total_h.mean()), fout_file) print_func('............Testing is end............', fout_file)
def train(config): # setup GPU device if available, move model into configured device device, device_ids = prepare_device(config["n_gpu"]) # datasets train_datasets = dict() valid_datasets = dict() ## train keys = ["datasets", "train"] for name in get_by_path(config, keys): train_datasets[name] = config.init_obj([*keys, name]) ## valid valid_exist = False keys = ["datasets", "valid"] for name in get_by_path(config, keys): valid_exist = True valid_datasets[name] = config.init_obj([*keys, name]) ## compute inverse class frequency as class weight if config["datasets"].get("imbalanced", False): target = train_datasets["data"].y_train # TODO class_weight = compute_class_weight(class_weight="balanced", classes=target.unique(), y=target) class_weight = torch.FloatTensor(class_weight).to(device) else: class_weight = None # losses losses = dict() for name in config["losses"]: kwargs = {} if "balanced" in get_by_path(config, ["losses", name, "type"]): kwargs.update(class_weight=class_weight) losses[name] = config.init_obj(["losses", name], **kwargs) # metrics metrics_iter = [ getattr(module_metric, met) for met in config["metrics"]["per_iteration"] ] metrics_epoch = [ getattr(module_metric, met) for met in config["metrics"]["per_epoch"] ] if "pick_threshold" in config["metrics"]: metrics_threshold = config.init_obj(["metrics", "pick_threshold"]) else: metrics_threshold = None torch_objs = { "datasets": { "train": train_datasets, "valid": valid_datasets }, "losses": losses, "metrics": { "iter": metrics_iter, "epoch": metrics_epoch, "threshold": metrics_threshold, }, } k_fold = config["k_fold"] if k_fold > 1: # cross validation enabled train_datasets["data"].split_cv_indexes(k_fold) results = pd.DataFrame() Cross_Valid.create_CV(k_fold) start = time.time() for k in range(k_fold): # data_loaders train_data_loaders = dict() valid_data_loaders = dict() ## train keys = ["data_loaders", "train"] for name in get_by_path(config, keys): kwargs = {} if "imbalanced" in get_by_path(config, [*keys, name, "module"]): kwargs.update(class_weight=class_weight.cpu().detach().numpy(), target=target) dataset = train_datasets[name] loaders = config.init_obj([*keys, name], dataset, **kwargs) train_data_loaders[name] = loaders.train_loader if not valid_exist: valid_data_loaders[name] = loaders.valid_loader ## valid keys = ["data_loaders", "valid"] for name in get_by_path(config, keys): dataset = valid_datasets[name] loaders = config.init_obj([*keys, name], dataset) valid_data_loaders[name] = loaders.valid_loader # models models = dict() logger_model = get_logger("model", verbosity=1) for name in config["models"]: model = config.init_obj(["models", name]) logger_model.info(model) model = model.to(device) if len(device_ids) > 1: model = torch.nn.DataParallel(model, device_ids=device_ids) models[name] = model # optimizers optimizers = dict() for name in config["optimizers"]: trainable_params = filter(lambda p: p.requires_grad, models[name].parameters()) optimizers[name] = config.init_obj(["optimizers", name], trainable_params) # learning rate schedulers lr_schedulers = dict() for name in config["lr_schedulers"]: lr_schedulers[name] = config.init_obj(["lr_schedulers", name], optimizers[name]) torch_objs.update({ "data_loaders": { "train": train_data_loaders, "valid": valid_data_loaders, }, "models": models, "optimizers": optimizers, "lr_schedulers": lr_schedulers, "amp": None, }) # amp if config["trainer"]["kwargs"]["apex"]: # TODO: revise here if multiple models and optimizers models["model"], optimizers["model"] = amp.initialize( models["model"], optimizers["model"], opt_level="O1") torch_objs["amp"] = amp trainer = config.init_obj(["trainer"], torch_objs, config.save_dir, config.resume, device) train_log = trainer.train() results = pd.concat((results, train_log), axis=1) # cross validation if k_fold > 1: Cross_Valid.next_fold() msg = msg_box("result") end = time.time() total_time = consuming_time(start, end) msg += f"\nConsuming time: {total_time}." result = pd.DataFrame() result["mean"] = results.mean(axis=1) result["std"] = results.std(axis=1) msg += f"\n{result}" logger.info(msg) return result