예제 #1
0
def main(config):
    logger = config.get_logger('train')
    # device = cpu if not GPU
    device, _ = prepare_device(config['n_gpu'])

    # setup data_loader instances with MNIST
    data_loader_source = config.init_obj('data_loader_CVPPP', DataLoader)
    valid_data_loader_source = data_loader_source.split_validation()

    # setup data_loader instances with MNIST
    data_loader_target = config.init_obj('data_loader_KOMATSUNA', DataLoader)
    valid_data_loader_target = data_loader_target.split_validation()

    # build model architecture, then print to console
    model = config.init_obj('UNET_ADAPT_arch', module_arch)

    logger.info(model)

    # prepare for (multi-device) GPU training
    device, device_ids = prepare_device(config['n_gpu'])
    model = model.to(device)

    # get function handles of loss and metrics
    loss_fn_class = getattr(module_loss, config['density_loss'])
    loss_fn_domain = getattr(module_loss, config['domain_loss'])
    metrics = [getattr(module_metric, met) for met in config['metrics']]

    # build optimizer, learning rate scheduler. delete every lines containing lr_scheduler for disabling scheduler
    #trainable_params = filter(lambda p: p.requires_grad, model.parameters())
    optimizer = config.init_obj('optimizer_CVPPP', torch.optim, [
        {
            'params': model.upsample.parameters(),
            'lr': 1e-3
        },
        {
            'params': model.downsample.parameters(),
            'lr': 1e-3
        },
        {
            'params': model.adapt.parameters(),
            'lr': 1e-4
        },
    ])
    lr_scheduler = config.init_obj('lr_scheduler', torch.optim.lr_scheduler,
                                   optimizer)

    trainer = Trainer(model=model,
                      loss_fn_class=loss_fn_class,
                      loss_fn_domain=loss_fn_domain,
                      metric_ftns=metrics,
                      optimizer=optimizer,
                      config=config,
                      device=device,
                      data_loader_source=data_loader_source,
                      valid_data_loader_source=valid_data_loader_source,
                      data_loader_target=data_loader_target,
                      valid_data_loader_target=valid_data_loader_target,
                      lr_scheduler=lr_scheduler)

    trainer.train()
예제 #2
0
def main(config):
    logger = config.get_logger('train')

    # setup data_loader instances
    data_loader = config.init_obj('data_loader', module_data)
    valid_data_loader = data_loader.get_validation()

    # build model architecture, then print to console
    model = config.init_obj('arch', module_arch)
    logger.info(model)

    # prepare for (multi-device) GPU training
    device, device_ids = prepare_device(config['n_gpu'])
    model = model.to(device)
    if len(device_ids) > 1:
        model = torch.nn.DataParallel(model, device_ids=device_ids)

    # get function handles of loss and metrics
    criterion = getattr(module_loss, config['loss'])
    metrics = [getattr(module_metric, met) for met in config['metrics']]

    # build optimizer, learning rate scheduler. delete every lines containing lr_scheduler for disabling scheduler
    trainable_params = filter(lambda p: p.requires_grad, model.parameters())
    optimizer = config.init_obj('optimizer', torch.optim, trainable_params)
    lr_scheduler = config.init_obj('lr_scheduler', torch.optim.lr_scheduler, optimizer)

    trainer = Trainer(model, criterion, metrics, optimizer,
                      config=config,
                      device=device,
                      data_loader=data_loader,
                      valid_data_loader=valid_data_loader,
                      lr_scheduler=lr_scheduler)

    trainer.train()
예제 #3
0
    def __init__(self, config):
        self.config = config

        self.logger = config.get_logger('demo')
        self.device, device_ids = prepare_device(self.logger, config['n_gpu'])

        torch.set_grad_enabled(False)
        self.model = config.init_obj('arch', module_arch)
        self.logger.info('Loading checkpoint: {} ...'.format(config.resume))
        if config['n_gpu'] > 0:
            checkpoint = torch.load(config.resume)
        else:
            checkpoint = torch.load(config.resume,
                                    map_location=torch.device('cpu'))
        state_dict = checkpoint['state_dict']
        if config['n_gpu'] > 1:
            self.model = torch.nn.DataParallel(self.model)
        self.model.load_state_dict(state_dict)
        self.model = self.model.to(self.device)
        self.model.eval()

        self.postprocessor = None
        if 'postprocessor' in config["tester"]:
            module_name = config["tester"]['postprocessor']['type']
            module_args = dict(config["tester"]['postprocessor']['args'])
            self.postprocessor = getattr(postps_crf,
                                         module_name)(**module_args)

        self.demo_dir = None
        self.classes = None
예제 #4
0
    def __init__(self,
                 config: ConfigParser,
                 model: nn.Module,
                 precision_threshold: float = 0.0,
                 recall_threshold: float = 0.0,
                 logger=None):
        self.config = config
        self.logger = logger if logger else config.get_logger('inference')
        self.p_threshold: float = precision_threshold
        self.r_threshold: float = recall_threshold

        self.device, self.device_ids = prepare_device(config['n_gpu'],
                                                      self.logger)
        self.state_dicts = []
        checkpoints = [config.resume] if config.resume is not None else list(
            config.save_dir.glob('**/model_best.pth'))
        for checkpoint in checkpoints:
            self.logger.info(f'Loading checkpoint: {checkpoint} ...')
            state_dict = torch.load(checkpoint,
                                    map_location=self.device)['state_dict']
            self.state_dicts.append(
                {k.replace('module.', ''): v
                 for k, v in state_dict.items()})

        self.model = model
예제 #5
0
def train(config) -> None:
    setup_logging('train')
    logger = logging.getLogger()
    logger.info(f'Training: {config}')
    seed_everything(config['SEED'])
    # setup data_loader instances
    data_loader = eval(config["DATA_LOADER"]["TYPE"])(**config["DATA_LOADER"]["ARGS"])
    valid_data_loader = data_loader.split_validation()
    # build model architecture, then print to console
    model = create_model((config["MODEL"]["TYPE"]))(**config["MODEL"]["ARGS"])
    logger.info(model)

    # prepare for (multi-device) GPU training
    device, device_ids = prepare_device(config['N_GPU'])
    model = model.to(device)
    if len(device_ids) > 1:
        model = torch.nn.DataParallel(model, device_ids=device_ids)

    # get function handles of loss and metrics
    criterion = eval(config['LOSS']).to(device)
    metrics = [eval(met) for met in config['METRICS']]

    # build optimizer, learning rate scheduler. delete every lines containing lr_scheduler for disabling scheduler
    optimizer = create_optimizer(config["OPTIMIZER"]["TYPE"])(**config["OPTIMIZER"]["ARGS"], model=model)
    lr_scheduler, num_epochs = create_scheduler(config["LR_SCHEDULER"]["TYPE"])(**config["LR_SCHEDULER"]["ARGS"],
                                                                                optimizer=optimizer)
    trainer = Trainer(model, criterion, metrics, optimizer,
                      config=config,
                      device=device,
                      data_loader=data_loader,
                      valid_data_loader=valid_data_loader,
                      lr_scheduler=lr_scheduler)
    trainer.train()
예제 #6
0
 def send_model2device(self, str_cuda_idxs):
     device, device_ids = prepare_device(str_cuda_idxs)
     if len(device_ids) > 1:
         self.multi_gpu = True
         self.model = torch.nn.DataParallel(self.model,
                                            device_ids=device_ids)
     self.model = self.model.to(device)
     return device
예제 #7
0
def main(config):
    logger = config.get_logger('train')

    # setup data_loader instances
    data_loader = config.init_obj('data_loader', module_data)
    print("Training Size:", data_loader.dataset.data.shape)
    valid_data_loader = data_loader.split_validation()
    print("Validation Size:", valid_data_loader.dataset.data.shape)

    # build model architecture, then print to console
    model = config.init_obj('arch', module_arch)
    logger.info(model)

    # prepare for (multi-device) GPU training
    device, device_ids = prepare_device(config['n_gpu'])
    model = model.to(device)
    if len(device_ids) > 1:
        model = torch.nn.DataParallel(model, device_ids=device_ids)

    # get function handles of loss and metrics
    criterion = getattr(module_loss, config['loss'])
    metrics = [getattr(module_metric, met) for met in config['metrics']]

    # build optimizer, learning rate scheduler. delete every lines containing lr_scheduler for disabling scheduler
    trainable_params = filter(lambda p: p.requires_grad, model.parameters())
    optimizer = config.init_obj('optimizer', torch.optim, trainable_params)
    lr_scheduler = config.init_obj('lr_scheduler', torch.optim.lr_scheduler,
                                   optimizer)

    # construct faults given index and layer
    try:
        layers = config['fault']['layer'].split('_')
        fault_indices = config['fault']['index']
        if isinstance(fault_indices, str):
            fault_indices = [[int(c) for c in fault_index.split(',')]
                             for fault_index in fault_indices.split('_')]
        times = list(map(int, config['fault']['time'].split('_')))
        assert len(times) == len(layers) == len(fault_indices)
        faults = []
        for layer, fault_indice, time in zip(layers, fault_indices, times):
            fault = Fault(fault_layer=layer,
                          fault_index=fault_indice,
                          time=time)
            faults.append(fault)
    except:
        faults = None
    trainer = FaultTrainer(model,
                           criterion,
                           metrics,
                           optimizer,
                           config=config,
                           device=device,
                           data_loader=data_loader,
                           valid_data_loader=valid_data_loader,
                           lr_scheduler=lr_scheduler,
                           fault=faults)

    trainer.train()
예제 #8
0
def main(config):
    logger = get_logger(name=__name__,
                        log_dir=config.log_dir,
                        verbosity=config['trainer']['verbosity'])
    torch.backends.cudnn.benchmark = True
    if config['seed'] is not None:
        torch.manual_seed(config['seed'])
        torch.backends.cudnn.deterministic = True
        np.random.seed(config['seed'])
        random.seed(config['seed'])
        logger.warning('You seeded the training. '
                       'This turns on the CUDNN deterministic setting, '
                       'which can slow down your training '
                       'You may see unexpected behavior when restarting '
                       'from checkpoints.')
    # setup data_loader instances
    data_loader_obj = config.init_obj('data_loader', module_data)
    data_loader = data_loader_obj.get_train_loader()
    valid_data_loader = data_loader_obj.get_valid_loader()

    # build model architecture, then print to console
    model = config.init_obj('arch', module_arch)
    # prepare for (multi-device) GPU training
    device, device_ids = prepare_device(config['n_gpu'])
    model = model.to(device)
    if len(device_ids) > 1:
        model = torch.nn.DataParallel(model, device_ids=device_ids)
    trainable_params = filter(lambda p: p.requires_grad, model.parameters())
    logger.info(
        summary(model,
                input_size=[config['data_loader']['args']['batch_size']] +
                config['input_size'],
                verbose=0))
    logger.info('Trainable parameters: {}'.format(
        sum([p.numel() for p in trainable_params])))

    # get function handles of loss and metrics
    criterion = getattr(module_loss, config['loss'])
    metrics = [getattr(module_metric, met) for met in config['metrics']]

    # build optimizer, learning rate scheduler.
    optimizer = config.init_obj('optimizer', torch.optim, model.parameters())
    lr_scheduler = config.init_obj('lr_scheduler', torch.optim.lr_scheduler,
                                   optimizer)

    trainer = Trainer(model,
                      criterion,
                      metrics,
                      optimizer,
                      config=config,
                      device=device,
                      data_loader=data_loader,
                      valid_data_loader=valid_data_loader,
                      lr_scheduler=lr_scheduler)

    trainer.train()
예제 #9
0
    def __init__(self, model, metrics, optimizer, config, train_dataset):
        self.config = config
        cfg_trainer: dict = config['trainer']
        self.epochs: int = cfg_trainer['epochs']
        self.save_start_epoch: int = cfg_trainer.get('save_start_epoch', 1)
        self.logger = config.get_logger('trainer', cfg_trainer['verbosity'])

        # setup GPU device if available, move model into configured device
        self.device, device_ids = prepare_device(config['n_gpu'], self.logger)
        self.num_devices = max(len(device_ids), 1)
        self.model = model.to(self.device)
        if self.num_devices > 1:
            self.model = torch.nn.DataParallel(model, device_ids=device_ids)

        max_bpg = self.config['trainer']['max_bpg']
        self.batches_per_optim = cfg_trainer['batch_size']
        self.gradient_accumulation_steps = math.ceil(
            self.batches_per_optim / (max_bpg * self.num_devices))
        batches_per_step = min(self.batches_per_optim,
                               max_bpg * self.num_devices)
        if self.gradient_accumulation_steps > 1:
            self.config['data_loaders']['valid']['args'][
                'batch_size'] = batches_per_step
        self.batches_per_device = math.ceil(batches_per_step /
                                            self.num_devices)
        self.config['data_loaders']['train']['args'][
            'batch_size'] = batches_per_step
        self.data_loader = self.config.init_obj('data_loaders.train',
                                                module_loader, train_dataset)
        self.total_step = len(self.data_loader) * self.epochs
        self.optimization_step_per_epoch = math.ceil(
            len(self.data_loader) / self.gradient_accumulation_steps)
        self.total_optimization_step = self.optimization_step_per_epoch * self.epochs

        self.metrics = metrics
        self.optimizer = optimizer

        # configuration to monitor model performance and save best
        self.monitor: str = cfg_trainer.get('monitor', 'off')
        if self.monitor == 'off':
            self.mnt_mode = 'off'
            self.mnt_best = 0
        else:
            self.mnt_mode, self.mnt_metric = self.monitor.split()
            assert self.mnt_mode in ['min', 'max']

            self.mnt_best = inf if self.mnt_mode == 'min' else -inf
            self.early_stop = cfg_trainer.get('early_stop', inf)

        self.start_epoch = 1

        if config.resume is not None:
            self._resume_checkpoint(config.resume)
예제 #10
0
def main(config):
    logger = config.get_logger('train')
    # setup data_loader instances
    preprocessor = config.init_obj('preprocessor', module_preprocessor)
    train_dataset = config.init_obj('dataset',
                                    module_dataset,
                                    preprocessor,
                                    mode='xeno',
                                    vanilla=True)
    #test_dataset = config.init_obj('dataset', module_dataset, preprocessor, mode = 'soundscape', vanilla = True)

    print("Done with datasets")
    train_data_loader = config.init_obj('data_loader', module_data,
                                        train_dataset)
    valid_data_loader = train_data_loader.split_validation()
    #test_data_loader = config.init_obj('data_loader', module_data, test_dataset)

    # build model architecture, then print to console
    if config['arch']['type'] == 'PretrainedModel':
        wrap = config.init_obj('arch', module_arch)
        model = wrap.get_model()
    else:
        model = config.init_obj('arch', module_arch)

    logger.info(model)

    # prepare for (multi-device) GPU training
    device, device_ids = prepare_device(config['n_gpu'])
    model = model.to(device)
    if len(device_ids) > 1:
        model = torch.nn.DataParallel(model, device_ids=device_ids)

    # get function handles of loss and metrics
    criterion = getattr(module_loss, config['loss'])
    metrics = [getattr(module_metric, met) for met in config['metrics']]

    # build optimizer, learning rate scheduler. delete every lines containing lr_scheduler for disabling scheduler
    trainable_params = filter(lambda p: p.requires_grad, model.parameters())
    optimizer = config.init_obj('optimizer', torch.optim, trainable_params)
    lr_scheduler = config.init_obj('lr_scheduler', torch.optim.lr_scheduler,
                                   optimizer)

    trainer = Trainer(model,
                      criterion,
                      metrics,
                      optimizer,
                      config=config,
                      device=device,
                      data_loader=train_data_loader,
                      valid_data_loader=valid_data_loader,
                      lr_scheduler=lr_scheduler)

    trainer.train()
예제 #11
0
def main(config):
    logger = config.get_logger('train')

    # setup data_loader instances
    data_loader = config.init_obj('data_loader', module_data)
    valid_data_loader = data_loader.split_validation()

    # build models architecture, then print to console
    model = config.init_obj('arch', module_arch)
    logger.info(model)

    # model load state dict
    state_dict = torch.load(config.resume)
    model.load_state_dict(state_dict)

    # prepare for (multi-device) GPU training
    device, device_ids = prepare_device(config['n_gpu'])
    model = model.to(device)
    if len(device_ids) > 1:
        model = torch.nn.DataParallel(model, device_ids=device_ids)

    # get function handles of loss and metrics
    criterion = getattr(module_loss, config['loss'])
    metrics = [getattr(module_metric, met) for met in config['metrics']]

    # build optimizer, learning rate scheduler. delete every lines containing lr_scheduler for disabling scheduler
    # freeze some layers for transfer learning

    # for name, param in model.named_parameters():
    #     if not ('output' in name):
    #         param.requires_grad = False

    # add the requires_grad parameter to optimizer
    trainable_params = filter(lambda p: p.requires_grad, model.parameters())
    optimizer = config.init_obj('optimizer', torch.optim, trainable_params)
    lr_scheduler = config.init_obj('lr_scheduler', torch.optim.lr_scheduler, optimizer)

    trainer = Trainer(model, criterion, metrics, optimizer,
                      config=config,
                      device=device,
                      data_loader=data_loader,
                      valid_data_loader=None,
                      lr_scheduler=lr_scheduler)

    trainer.train()
예제 #12
0
    def __init__(self, model, criterion, metric_ftns, optimizer, config):
        self.config = config
        self.logger = config.get_logger('trainer', config['trainer']['verbosity'])

        if model is not None:
            # setup GPU device if available, move model into configured device
            self.device, device_ids = prepare_device(self.logger, config['n_gpu'])
            self.model = model.to(self.device)
            if len(device_ids) > 1:
                self.model = torch.nn.DataParallel(model, device_ids=device_ids)

            self.criterion = criterion
            self.metric_ftns = metric_ftns
            self.optimizer = optimizer

        cfg_trainer = config['trainer']
        self.epochs = cfg_trainer['epochs']
        self.save_period = cfg_trainer['save_period']
        self.monitor = cfg_trainer.get('monitor', 'off')

        # configuration to monitor model performance and save best
        if self.monitor == 'off':
            self.mnt_mode = 'off'
            self.mnt_best = 0
        else:
            self.mnt_mode, self.mnt_metric = self.monitor.split()
            assert self.mnt_mode in ['min', 'max']

            self.mnt_best = inf if self.mnt_mode == 'min' else -inf
            self.early_stop = cfg_trainer.get('early_stop', inf)

        self.start_epoch = 1

        self.checkpoint_dir = config.save_dir

        # setup visualization writer instance                
        self.writer = TensorboardWriter(config.log_dir, self.logger, cfg_trainer['tensorboard'])

        if config.resume is not None:
            self._resume_checkpoint(config.resume)
예제 #13
0
def main():
    # Training settings
    parser = argparse.ArgumentParser(description='PyTorch')
    parser.add_argument('--batch-size',
                        type=int,
                        default=64,
                        metavar='N',
                        help='input batch size for training (default: 64)')
    parser.add_argument('--test-batch-size',
                        type=int,
                        default=100,
                        metavar='N',
                        help='input batch size for testing (default: 100)')
    parser.add_argument('--epochs',
                        type=int,
                        default=100000,
                        metavar='N',
                        help='number of epochs to train (default: 10)')
    parser.add_argument('--lr',
                        type=float,
                        default=0.0001,
                        metavar='LR',
                        help='learning rate (default: 0.0001)')
    parser.add_argument('--momentum',
                        type=float,
                        default=0.5,
                        metavar='M',
                        help='SGD momentum (default: 0.5)')
    parser.add_argument('--no-cuda',
                        action='store_true',
                        default=False,
                        help='disables CUDA training')
    parser.add_argument('--cuda_dev',
                        type=int,
                        default=0,
                        help='select specific CUDA device for training')
    parser.add_argument('--n_gpu_use',
                        type=int,
                        default=1,
                        help='select number of CUDA device for training')
    # parser.add_argument('--seed', type=int, default=1, metavar='S',
    #                     help='random seed (default: 1)')
    parser.add_argument('--log-interval',
                        type=int,
                        default=100,
                        metavar='N',
                        help='logging training status cadency')
    parser.add_argument('--save-model',
                        action='store_true',
                        default=False,
                        help='For Saving the current Model')
    parser.add_argument('--tensorboard',
                        action='store_true',
                        default=True,
                        help='For logging the model in tensorboard')

    args = parser.parse_args()

    use_cuda = not args.no_cuda and torch.cuda.is_available()
    if not use_cuda:
        args.n_gpu_use = 0

    device = utils.prepare_device(n_gpu_use=args.n_gpu_use,
                                  gpu_id=args.cuda_dev)
    # kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}

    # torch.manual_seed(args.seed)
    # # fix random seeds for reproducibility
    # SEED = 123
    # torch.manual_seed(SEED)
    # torch.backends.cudnn.deterministic = True
    # torch.backends.cudnn.benchmark = False
    # np.random.seed(SEED)

    # configugations
    cfg = SemiSupLPGNNWrapper.Config()
    cfg.use_cuda = use_cuda
    cfg.device = device

    cfg.log_interval = args.log_interval
    cfg.tensorboard = args.tensorboard

    # cfg.batch_size = args.batch_size
    # cfg.test_batch_size = args.test_batch_size
    # cfg.momentum = args.momentum

    cfg.dataset_path = './data'
    cfg.epochs = 1000
    cfg.activation = nn.LeakyReLU()
    cfg.state_transition_hidden_dims = [
        150,
    ]
    cfg.output_function_hidden_dims = [
        30,
    ]
    # cfg.state_dim = [7, 2]
    cfg.state_dim = [350, 150]
    cfg.graph_based = False
    cfg.log_interval = 10
    cfg.lrw = 0.001
    cfg.lrx = 0.003
    cfg.lrλ = 0.003
    cfg.task_type = "semisupervised"
    cfg.layers = len(cfg.state_dim) if type(
        cfg.state_dim
    ) is list else 1  # getting number of LPGNN layers from state_dim list

    # LPGNN
    cfg.eps = 1e-6
    cfg.state_constraint_function = "squared"
    cfg.loss_w = 0.0005
    # model creation  - a unique model
    model = SemiSupLPGNNWrapper(cfg)
    # dataset creation
    dset = dataloader.get_dgl_cora(aggregation_type="sum",
                                   sparse_matrix=True)  # generate the dataset
    #dset = dataloader.get_dgl_citation(aggregation_type="sum") # generate the dataset
    #dset = dataloader.get_dgl_karate(aggregation_type="sum")  # generate the dataset

    model(dset)  # dataset initalization into the GNN

    # training code
    for epoch in range(1, args.epochs + 1):
        model.global_step(epoch)
예제 #14
0
            if trainer_start_btn:
                logger = logging.getLogger()
                seed_everything(trainer_seed)
                logger.info(trainer)
                logger.info("seed: {}, save_dir: {}", str(trainer_seed),
                            str(trainer_save_path))
                # data
                data_loader = eval("data_module." + trainer_dataloader +
                                   "DataLoader")(**cfg["DATA_LOADER"]["ARGS"])
                valid_data_loader = data_loader.split_validation()
                # model
                model = eval("model_module." + trainer_model)(
                    **sessions.trainer_params[trainer_id]["model_params"])
                # logger.info(model)
                # gpu
                device, device_ids = prepare_device(cfg['N_GPU'])
                model = model.to(device)
                if len(device_ids) > 1:
                    model = torch.nn.DataParallel(model, device_ids=device_ids)
                # criterion
                criterion = eval("loss_module." + trainer_loss)
                # metrics
                metrics = [
                    eval("metric_module." + met)
                    for met in eval(trainer_metrics)
                ]

                # optimizer
                optimizer = eval("optimizer_module." + trainer_optimizer)(
                    **sessions.trainer_params[trainer_id]["optimizer_params"],
                    params=model.parameters())
예제 #15
0
파일: main_chain.py 프로젝트: mtiezzi/lpgnn
def main():
    # Training settings
    parser = argparse.ArgumentParser(description='PyTorch')
    parser.add_argument('--epochs', type=int, default=100000, metavar='N',
                        help='number of epochs to train (default: 10)')
    parser.add_argument('--lr', type=float, default=0.0001, metavar='LR',
                        help='learning rate (default: 0.0001)')
    parser.add_argument('--no-cuda', action='store_true', default=False,
                        help='disables CUDA training')
    parser.add_argument('--cuda_dev', type=int, default=0,
                        help='select specific CUDA device for training')
    parser.add_argument('--n_gpu_use', type=int, default=1,
                        help='select number of CUDA device for training')
    # parser.add_argument('--seed', type=int, default=1, metavar='S',
    #                     help='random seed (default: 1)')
    parser.add_argument('--log-interval', type=int, default=50, metavar='N',
                        help='logging training status cadency')
    # parser.add_argument('--save-model', action='store_true', default=False,
    #                     help='For Saving the current Model')
    parser.add_argument('--tensorboard', action='store_true', default=False,
                        help='For logging the model in tensorboard')

    args = parser.parse_args()

    use_cuda = not args.no_cuda and torch.cuda.is_available()
    if not use_cuda:
        args.n_gpu_use = 0

    device = utils.prepare_device(n_gpu_use=args.n_gpu_use, gpu_id=args.cuda_dev)
    # kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}

    # configugations
    cfg = LPGNNWrapper.Config()
    cfg.use_cuda = use_cuda
    cfg.device = device
    # cfg.seed = SEED

    cfg.log_interval = args.log_interval
    cfg.tensorboard = args.tensorboard

    cfg.dataset_path = './data'
    cfg.epochs = args.epochs
    cfg.activation = nn.Tanh()
    cfg.state_transition_hidden_dims = [10, ]
    cfg.output_function_hidden_dims = [3, ]
    # cfg.state_dim = [7, 2]
    cfg.state_dim = [5, ]
    cfg.graph_based = False
    cfg.log_interval = 300
    cfg.lrw = 0.01
    cfg.lrx = 0.03
    cfg.lrλ = 0.01
    cfg.task_type = "semisupervised"
    cfg.layers = len(cfg.state_dim) if type(
        cfg.state_dim) is list else 1  # getting number of LPGNN layers from state_dim list

    # LPGNN
    cfg.eps = 1e-6
    cfg.state_constraint_function = "eps"
    cfg.loss_w = 0.001
    # model creation  - a unique model
    model = SemiSupLPGNNWrapper(cfg)
    # dataset creation
    #dset = dataloader.get_karate(aggregation_type="sum", sparse_matrix=True)  # generate the dataset
    # dset = dataloader.get_twochainsSSE(aggregation_type="sum", percentage=0.1, sparse_matrix=True)  # generate the dataset
    dset = dataloader.get_twochains(num_nodes_per_graph= 1000,
        pct_labels= .2,
        pct_valid= .2,sparse_matrix=True)  # generate the dataset
    model(dset)  # dataset initalization into the GNN

    import time
    start_get = time.time()
    # training code
    for epoch in range(args.epochs):
        model.global_step(epoch, start_get)
예제 #16
0
def main():
    # Training settings
    parser = argparse.ArgumentParser(description='PyTorch')
    parser.add_argument('--epochs',
                        type=int,
                        default=10000,
                        metavar='N',
                        help='number of epochs to train (default: 10)')
    parser.add_argument('--lr',
                        type=float,
                        default=0.0001,
                        metavar='LR',
                        help='learning rate (default: 0.0001)')
    parser.add_argument('--no-cuda',
                        action='store_true',
                        default=False,
                        help='disables CUDA training')
    parser.add_argument('--cuda_dev',
                        type=int,
                        default=0,
                        help='select specific CUDA device for training')
    parser.add_argument('--n_gpu_use',
                        type=int,
                        default=1,
                        help='select number of CUDA device for training')
    parser.add_argument('--log-interval',
                        type=int,
                        default=50,
                        metavar='N',
                        help='logging training status cadency')
    parser.add_argument('--tensorboard',
                        action='store_true',
                        default=True,
                        help='For logging the model in tensorboard')

    args = parser.parse_args()

    use_cuda = not args.no_cuda and torch.cuda.is_available()
    if not use_cuda:
        args.n_gpu_use = 0

    device = utils.prepare_device(n_gpu_use=args.n_gpu_use,
                                  gpu_id=args.cuda_dev)
    # kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}

    # torch.manual_seed(args.seed)
    # # fix random seeds for reproducibility
    # SEED = 123
    # torch.manual_seed(SEED)
    # torch.backends.cudnn.deterministic = True
    # torch.backends.cudnn.benchmark = False
    # np.random.seed(SEED)

    # configugations
    cfg = GNNWrapper.Config()
    cfg.use_cuda = use_cuda
    cfg.device = device

    cfg.log_interval = args.log_interval
    cfg.tensorboard = args.tensorboard

    # cfg.batch_size = args.batch_size
    # cfg.test_batch_size = args.test_batch_size
    # cfg.momentum = args.momentum

    cfg.dataset_path = './data'
    cfg.epochs = args.epochs
    cfg.lrw = args.lr
    cfg.activation = nn.Sigmoid()
    cfg.state_transition_hidden_dims = [
        10,
    ]
    cfg.output_function_hidden_dims = [5]
    cfg.state_dim = 10  #
    cfg.max_iterations = 50
    cfg.convergence_threshold = 0.01
    cfg.graph_based = False
    cfg.log_interval = 10
    cfg.lrw = 0.01
    cfg.task_type = "multiclass"

    # model creation
    # model_tr = GNNWrapper(cfg)
    # model_val = GNNWrapper(cfg)
    # model_tst = GNNWrapper(cfg)

    cfg.dset_name = "sub_30_15_200"
    cfg.aggregation_type = "degreenorm"
    # dataset creation
    dset = dataloader.get_subgraph(set=cfg.dset_name,
                                   aggregation_type=cfg.aggregation_type,
                                   sparse_matrix=True)  # generate the dataset

    cfg.label_dim = dset["train"].node_label_dim

    state_nets = [
        net.StateTransition(cfg.state_dim,
                            cfg.label_dim,
                            mlp_hidden_dim=cfg.state_transition_hidden_dims,
                            activation_function=cfg.activation),
        net.GINTransition(cfg.state_dim,
                          cfg.label_dim,
                          mlp_hidden_dim=cfg.state_transition_hidden_dims,
                          activation_function=cfg.activation),
        net.GINPreTransition(cfg.state_dim,
                             cfg.label_dim,
                             mlp_hidden_dim=cfg.state_transition_hidden_dims,
                             activation_function=cfg.activation)
    ]

    lrs = [0.05, 0.01, 0.001]

    hyperparameters = dict(lr=lrs, state_net=state_nets)
    hyperparameters_values = [v for v in hyperparameters.values()]

    start_0 = time.time()
    for lr, state_net in product(*hyperparameters_values):
        cfg.lrw = lr
        cfg.state_net = state_net

        print(
            f"learning_rate:{lr}, state_dim:{cfg.state_dim}, aggregation function:{str(state_net).split('(')[0]} "
        )
        # model creation
        model_tr = GNNWrapper(cfg)
        model_val = GNNWrapper(cfg)
        model_tst = GNNWrapper(cfg)

        # 24.3.21 STOPPER
        early_stopper = utils.EarlyStopper(cfg)

        model_tr(dset["train"],
                 state_net=state_net)  # dataset initalization into the GNN
        model_val(dset["validation"],
                  state_net=model_tr.gnn.state_transition_function,
                  out_net=model_tr.gnn.output_function
                  )  # dataset initalization into the GNN
        model_tst(dset["test"],
                  state_net=model_tr.gnn.state_transition_function,
                  out_net=model_tr.gnn.output_function
                  )  # dataset initalization into the GNN
        # training code
        start = time.time()
        for epoch in range(1, args.epochs + 1):
            acc_train = model_tr.train_step(epoch)
            if epoch % 10 == 0:
                acc_tst = model_tst.test_step(epoch)
                acc_val = model_val.valid_step(epoch)
                stp = early_stopper(acc_train, acc_val, acc_tst, epoch)

                # return -1 keeps training the model!
                if stp == -1:
                    print(
                        f"{early_stopper.best_epoch}, \t {early_stopper.best_train}, \t, {early_stopper.best_val}, \t {early_stopper.best_test}"
                    )
                    break
                # model_tst.test_step(epoch)

        time_sample = time.time() - start
        print(f"time taken for one set: {str(time_sample)} seconds")

    time_whole = time.time() - start_0
    print(f"time taken for the whole experiment: {str(time_whole)} seconds")
예제 #17
0
 def send_model2device(self, str_cuda_idxs):
     device, device_ids = prepare_device(str_cuda_idxs)
     assert len(device_ids) <= 1, \
         "multi-gpu mode for computing embeddings is not supported now"
     self.model = self.model.to(device)
     return device
예제 #18
0
def main(config):

    # set random seed

    torch.manual_seed(config.seed)
    np.random.seed(config.seed)
    random.seed(config.seed)

    # prepare hardware accelearation

    device, gpu_device_ids = prepare_device(config.num_gpu)

    if "cuda" in str(device):
        cp.print_green(f"utilizing gpu devices : {gpu_device_ids}")
        torch.cuda.manual_seed(config.seed)

    # Preapre model

    model_config = load_json(config.model_config)
    model_class = getattr(model_modules, model_config["model"])
    model = model_class(model_config["config"])

    trained_model = model_config["trained_model"]
    cp.print_green(f"pretrained model: {trained_model}")

    model.load_state_dict(torch.load(trained_model, map_location=device),
                          strict=False)
    cp.print_green("model:\n", model)

    activation = None
    if "activation" in model_config:
        activation = getattr(torch, model_config["activation"], None)
    cp.print_green("activation: ", type(activation).__name__)

    # Prepare DataLoader

    data_config = load_json(config.data_config)
    data_loader_class = getattr(data_loader_modules,
                                data_config["data_loader"])

    # Preapre file handler

    output_path = config.output_folder + "/" + type(model).__name__

    cp.print_green(f"file type: {config.file_type}")
    cp.print_green(f"output folder: {output_path}")

    # Prepare extraction

    if len(gpu_device_ids) > 1:
        model = torch.nn.DataParallel(model, device_ids=gpu_device_ids)

    model.eval()
    model.to(device)

    # Extract train features

    train_data_loader = data_loader_class(data_config["train_config"])
    train_file_handler = file_handler_modules.handler_mapping[
        config.file_type](output_path + "/train")

    meta = extract_feature(model, activation, train_data_loader,
                           train_file_handler, device)

    cp.print_green('train meta file:\n', meta)

    train_file_handler.generate_meta_file(meta)

    del train_file_handler, train_data_loader

    # Extract test features

    test_data_loader = data_loader_class(data_config["test_config"])
    test_file_handler = file_handler_modules.handler_mapping[config.file_type](
        output_path + "/test")

    meta = extract_feature(model, activation, test_data_loader,
                           test_file_handler, device)

    cp.print_green('test meta file:\n', meta)

    test_file_handler.generate_meta_file(meta)
예제 #19
0
def main(args):
    if not check_exists(args.save_dir):
        os.makedirs(args.save_dir)

    dataset = IQiYiFineTuneSceneDataset(args.data_root,
                                        'train+val-noise',
                                        image_root='/home/dcq/img')

    data_loader = DataLoader(dataset,
                             batch_size=args.batch_size,
                             shuffle=True,
                             num_workers=4)

    log_step = len(data_loader) // 10 if len(data_loader) > 10 else 1

    model = ArcFaceSEResNeXtModel(args.num_classes, include_top=True)
    metric_func = ArcMarginProduct()
    loss_func = FocalLoss(gamma=2.)

    trainable_params = [
        {
            'params': model.base_model.parameters(),
            "lr": args.learning_rate / 100
        },
        {
            'params': model.weight
        },
    ]

    optimizer = optim.SGD(trainable_params,
                          lr=args.learning_rate,
                          momentum=0.9,
                          weight_decay=1e-5)
    lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
        optimizer, args.epoch)

    device, device_ids = prepare_device()
    model = model.to(device)
    if len(device_ids) > 1:
        model = torch.nn.DataParallel(model, device_ids=device_ids)

    for epoch_idx in range(args.epoch):
        total_loss = .0
        for batch_idx, (images, labels, _) in enumerate(data_loader):
            images = images.view(-1, *images.size()[-3:])
            images = images.to(device)
            labels = labels.to(device)

            optimizer.zero_grad()

            outputs = model(images)
            outputs = outputs.view(outputs.size(0) // 3, 3, -1)
            outputs = torch.mean(outputs, dim=1)
            outputs_metric = metric_func(outputs, labels)
            local_loss = loss_func(outputs_metric, labels)

            local_loss.backward()
            optimizer.step()

            total_loss += local_loss.item()

            if batch_idx % log_step == 0 and batch_idx != 0:
                print('Epoch: {} [{}/{} ({:.0f}%)] Loss: {:.6f}'.format(
                    epoch_idx, batch_idx * args.batch_size, len(dataset),
                    100.0 * batch_idx / len(data_loader), local_loss.item()))

        log = {
            'epoch': epoch_idx,
            'lr': optimizer.param_groups[0]['lr'],
            'loss': total_loss / len(data_loader)
        }

        for key, value in sorted(log.items(), key=lambda item: item[0]):
            print('    {:20s}: {:6f}'.format(str(key), value))

        lr_scheduler.step()

    save_model(model.module, args.save_dir, 'demo_arcface_fine_tune_model',
               args.epoch)
예제 #20
0
def test(config):
    # setup GPU device if available, move model into configured device
    device, device_ids = prepare_device(config["n_gpu"])

    # datasets
    test_datasets = dict()
    keys = ["datasets", "test"]
    for name in get_by_path(config, keys):
        test_datasets[name] = config.init_obj([*keys, name])

    results = pd.DataFrame()
    k_fold = config["k_fold"]
    Cross_Valid.create_CV(k_fold)
    start = time.time()
    for k in range(k_fold):
        # data_loaders
        test_data_loaders = dict()
        keys = ["data_loaders", "test"]
        for name in get_by_path(config, keys):
            dataset = test_datasets[name]
            loaders = config.init_obj([*keys, name], dataset)
            test_data_loaders[name] = loaders.test_loader

        # models
        if k_fold > 1:
            fold_prefix = f"fold_{k}_"
            dirname = os.path.dirname(config.resume)
            basename = os.path.basename(config.resume)
            resume = os.path.join(dirname, fold_prefix + basename)
        else:
            resume = config.resume
        logger.info(f"Loading model: {resume} ...")
        checkpoint = torch.load(resume)
        models = dict()
        logger_model = get_logger("model", verbosity=0)
        for name in config["models"]:
            model = config.init_obj(["models", name])
            logger_model.info(model)
            state_dict = checkpoint["models"][name]
            if config["n_gpu"] > 1:
                model = torch.nn.DataParallel(model)
            model.load_state_dict(state_dict)
            model = model.to(device)
            model.eval()
            models[name] = model
        model = models["model"]

        # losses
        loss_fn = config.init_obj(["losses", "loss"])

        # metrics
        metrics_epoch = [
            getattr(module_metric, met)
            for met in config["metrics"]["per_epoch"]
        ]
        keys_epoch = [m.__name__ for m in metrics_epoch]
        test_metrics = MetricTracker([], keys_epoch)
        if "pick_threshold" in config["metrics"]:
            threshold = checkpoint["threshold"]
            setattr(module_metric, "THRESHOLD", threshold)
            logger.info(f"threshold: {threshold}")

        with torch.no_grad():
            print("testing...")
            test_loader = test_data_loaders["data"]

            if len(metrics_epoch) > 0:
                outputs = torch.FloatTensor().to(device)
                targets = torch.FloatTensor().to(device)
            for batch_idx, (data, target) in enumerate(test_loader):
                data, target = data.to(device), target.to(device)

                output = model(data)
                if len(metrics_epoch) > 0:
                    outputs = torch.cat((outputs, output))
                    targets = torch.cat((targets, target))

                #
                # save sample images, or do something with output here
                #

            for met in metrics_epoch:
                test_metrics.epoch_update(met.__name__, met(targets, outputs))

        test_log = test_metrics.result()
        test_log = test_log["mean"].rename(k)
        results = pd.concat((results, test_log), axis=1)
        logger.info(test_log)

        # cross validation
        if k_fold > 1:
            Cross_Valid.next_fold()

    msg = msg_box("result")

    end = time.time()
    total_time = consuming_time(start, end)
    msg += f"\nConsuming time: {total_time}."

    result = pd.DataFrame()
    result["mean"] = results.mean(axis=1)
    result["std"] = results.std(axis=1)
    msg += f"\n{result}"

    logger.info(msg)

    # bootstrap
    if config.test_args.bootstrapping:
        assert k_fold == 1, "k-fold ensemble and bootstrap are mutually exclusive."
        N = config.test_args.bootstrap_times
        bootstrapping(targets, outputs, metrics_epoch, test_metrics, repeat=N)
예제 #21
0
파일: main.py 프로젝트: dwidemann/base_repo
def main(config):
    logger = config.get_logger('trainer', config['trainer']['verbosity'])
    # print logged informations to the screen
    pprint.pprint(config.config)

    trainDataLoader = config.init_obj('data_loader', dataset_classes)
    X, y = next(iter(trainDataLoader))
    print(X.shape)

    config_test = deepcopy(config)
    config_test.config['data_loader']['training'] = False
    valDataLoader = config_test.init_obj('data_loader', dataset_classes)

    model = config.init_obj('arch', model_classes)
    logger.info(model)

    # prepare for (multi-device) GPU training
    device, device_ids = prepare_device(config['n_gpu'])
    model = model.to(device)
    if len(device_ids) > 1:
        model = torch.nn.DataParallel(model, device_ids=device_ids)
    logger.info(device)
    logger.info(device_ids)

    # get function handles of loss and metrics
    criterion = getattr(losses, config['loss'])
    logger.info(criterion)
    metrics = [getattr(Metrics, met) for met in config['metrics']]
    logger.info(metrics)

    # build optimizer, learning rate scheduler. delete every lines containing lr_scheduler for disabling scheduler
    trainable_params = filter(lambda p: p.requires_grad, model.parameters())
    optimizer = config.init_obj('optimizer', torch.optim, trainable_params)
    logger.info(optimizer)
    lr_scheduler = config.init_obj('lr_scheduler', torch.optim.lr_scheduler,
                                   optimizer)
    logger.info(lr_scheduler)

    cfg_trainer = config['trainer']
    train_writer = TensorboardWriter(config.log_dir, logger,
                                     config['visualization']['tensorboardX'])
    train_metrics = MetricTracker('loss',
                                  *[m.__name__ for m in metrics],
                                  writer=train_writer)

    val_writer = TensorboardWriter(config.log_dir, logger,
                                   config['visualization']['tensorboardX'])
    val_metrics = MetricTracker('loss',
                                *[m.__name__ for m in metrics],
                                writer=train_writer)

    for idx in range(1, cfg_trainer['epochs'] + 1):
        log = train_epoch(model, trainDataLoader, device, optimizer,
                          lr_scheduler, logger, train_metrics, criterion, idx,
                          train_writer, metrics)
        logger.info(log)

        val_log = validation_epoch(model, valDataLoader, device, logger,
                                   val_metrics, criterion, idx, val_writer,
                                   metrics)
        logger.info(val_log)
예제 #22
0
파일: trainer.py 프로젝트: ccfbupt/ATL-Net
def main(config):
    result_name = '{}_{}_{}way_{}shot'.format(
        config['data_name'],
        config['arch']['base_model'],
        config['general']['way_num'],
        config['general']['shot_num'],
    )
    save_path = os.path.join(config['general']['save_root'], result_name)
    if not os.path.exists(save_path):
        os.mkdir(save_path)
    fout_path = os.path.join(save_path, 'train_info.txt')
    fout_file = open(fout_path, 'a+')
    with open(os.path.join(save_path, 'config.json'), 'w') as handle:
        json.dump(config, handle, indent=4, sort_keys=True)
    print_func(config, fout_file)

    train_trsfms = transforms.Compose([
        transforms.Resize((config['general']['image_size'],
                           config['general']['image_size'])),
        transforms.ToTensor(),
        transforms.Normalize(mean=mean, std=std),
    ])

    val_trsfms = transforms.Compose([
        transforms.Resize((config['general']['image_size'],
                           config['general']['image_size'])),
        transforms.ToTensor(),
        transforms.Normalize(mean=mean, std=std),
    ])

    model = ALTNet(**config['arch'])
    print_func(model, fout_file)

    optimizer = optim.Adam(model.parameters(), lr=config['train']['optim_lr'])

    if config['train']['lr_scheduler']['name'] == 'StepLR':
        lr_scheduler = optim.lr_scheduler.StepLR(
            optimizer=optimizer, **config['train']['lr_scheduler']['args'])
    elif config['train']['lr_scheduler']['name'] == 'MultiStepLR':
        lr_scheduler = optim.lr_scheduler.MultiStepLR(
            optimizer=optimizer, **config['train']['lr_scheduler']['args'])
    else:
        raise RuntimeError

    if config['train']['loss']['name'] == 'CrossEntropyLoss':
        criterion = nn.CrossEntropyLoss(**config['train']['loss']['args'])
    else:
        raise RuntimeError

    device, _ = prepare_device(config['n_gpu'])
    model = model.to(device)
    criterion = criterion.to(device)

    best_val_prec1 = 0
    best_test_prec1 = 0
    for epoch_index in range(config['train']['epochs']):
        print_func('{} Epoch {} {}'.format('=' * 35, epoch_index, '=' * 35),
                   fout_file)
        train_dataset = ImageFolder(
            data_root=config['general']['data_root'],
            mode='train',
            episode_num=config['train']['episode_num'],
            way_num=config['general']['way_num'],
            shot_num=config['general']['shot_num'],
            query_num=config['general']['query_num'],
            transform=train_trsfms,
        )
        val_dataset = ImageFolder(
            data_root=config['general']['data_root'],
            mode='val',
            episode_num=config['test']['episode_num'],
            way_num=config['general']['way_num'],
            shot_num=config['general']['shot_num'],
            query_num=config['general']['query_num'],
            transform=val_trsfms,
        )
        test_dataset = ImageFolder(
            data_root=config['general']['data_root'],
            mode='test',
            episode_num=config['test']['episode_num'],
            way_num=config['general']['way_num'],
            shot_num=config['general']['shot_num'],
            query_num=config['general']['query_num'],
            transform=val_trsfms,
        )

        print_func(
            'The num of the train_dataset: {}'.format(len(train_dataset)),
            fout_file)
        print_func('The num of the val_dataset: {}'.format(len(val_dataset)),
                   fout_file)
        print_func('The num of the test_dataset: {}'.format(len(test_dataset)),
                   fout_file)

        train_loader = torch.utils.data.DataLoader(
            train_dataset,
            batch_size=config['train']['batch_size'],
            shuffle=True,
            num_workers=config['general']['workers_num'],
            drop_last=True,
            pin_memory=True)
        val_loader = torch.utils.data.DataLoader(
            val_dataset,
            batch_size=config['test']['batch_size'],
            shuffle=True,
            num_workers=config['general']['workers_num'],
            drop_last=True,
            pin_memory=True)
        test_loader = torch.utils.data.DataLoader(
            test_dataset,
            batch_size=config['test']['batch_size'],
            shuffle=True,
            num_workers=config['general']['workers_num'],
            drop_last=True,
            pin_memory=True)

        # train for 5000 episodes in each epoch
        print_func('============ Train on the train set ============',
                   fout_file)
        train(train_loader, model, criterion, optimizer, epoch_index, device,
              fout_file, config['general']['image2level'],
              config['general']['print_freq'])

        print_func('============ Validation on the val set ============',
                   fout_file)
        val_prec1 = validate(val_loader, model, criterion, epoch_index, device,
                             fout_file, config['general']['image2level'],
                             config['general']['print_freq'])
        print_func(
            ' * Prec@1 {:.3f} Best Prec1 {:.3f}'.format(
                val_prec1, best_val_prec1), fout_file)

        print_func('============ Testing on the test set ============',
                   fout_file)
        test_prec1 = validate(test_loader, model, criterion, epoch_index,
                              device, fout_file,
                              config['general']['image2level'],
                              config['general']['print_freq'])
        print_func(
            ' * Prec@1 {:.3f} Best Prec1 {:.3f}'.format(
                test_prec1, best_test_prec1), fout_file)

        if val_prec1 > best_val_prec1:
            best_val_prec1 = val_prec1
            best_test_prec1 = test_prec1
            save_model(model,
                       save_path,
                       config['data_name'],
                       epoch_index,
                       is_best=True)

        if epoch_index % config['general'][
                'save_freq'] == 0 and epoch_index != 0:
            save_model(model,
                       save_path,
                       config['data_name'],
                       epoch_index,
                       is_best=False)

        lr_scheduler.step()

    print_func('............Training is end............', fout_file)
예제 #23
0
def main():
    # Training settings
    parser = argparse.ArgumentParser(description='PyTorch')
    parser.add_argument('--epochs',
                        type=int,
                        default=100000,
                        metavar='N',
                        help='number of epochs to train (default: 10)')
    parser.add_argument('--lr',
                        type=float,
                        default=0.0001,
                        metavar='LR',
                        help='learning rate (default: 0.0001)')
    parser.add_argument('--no-cuda',
                        action='store_true',
                        default=False,
                        help='disables CUDA training')
    parser.add_argument('--cuda_dev',
                        type=int,
                        default=0,
                        help='select specific CUDA device for training')
    parser.add_argument('--n_gpu_use',
                        type=int,
                        default=1,
                        help='select number of CUDA device for training')
    parser.add_argument('--log-interval',
                        type=int,
                        default=50,
                        metavar='N',
                        help='logging training status cadency')
    parser.add_argument('--tensorboard',
                        action='store_true',
                        default=True,
                        help='For logging the model in tensorboard')

    args = parser.parse_args()

    use_cuda = not args.no_cuda and torch.cuda.is_available()
    if not use_cuda:
        args.n_gpu_use = 0

    device = utils.prepare_device(n_gpu_use=args.n_gpu_use,
                                  gpu_id=args.cuda_dev)
    # kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}

    # torch.manual_seed(args.seed)
    # # fix random seeds for reproducibility
    # SEED = 123
    # torch.manual_seed(SEED)
    # torch.backends.cudnn.deterministic = True
    # torch.backends.cudnn.benchmark = False
    # np.random.seed(SEED)

    # configugations
    cfg = GNNWrapper.Config()
    cfg.use_cuda = use_cuda
    cfg.device = device

    cfg.log_interval = args.log_interval
    cfg.tensorboard = args.tensorboard

    # cfg.batch_size = args.batch_size
    # cfg.test_batch_size = args.test_batch_size
    # cfg.momentum = args.momentum

    cfg.dataset_path = './data'
    cfg.epochs = args.epochs
    cfg.lrw = args.lr
    cfg.activation = nn.Sigmoid()
    cfg.state_transition_hidden_dims = [
        10,
    ]
    cfg.output_function_hidden_dims = [5]
    cfg.state_dim = 10
    cfg.max_iterations = 50
    cfg.convergence_threshold = 0.01
    cfg.graph_based = False
    cfg.log_interval = 10
    cfg.lrw = 0.01
    cfg.task_type = "multiclass"

    # model creation
    model_tr = GNNWrapper(cfg)
    model_val = GNNWrapper(cfg)
    model_tst = GNNWrapper(cfg)
    # dataset creation
    dset = dataloader.get_subgraph(set="cli_15_7_200",
                                   aggregation_type="sum",
                                   sparse_matrix=True)  # generate the dataset
    model_tr(dset["train"])  # dataset initalization into the GNN
    model_val(dset["validation"],
              state_net=model_tr.gnn.state_transition_function,
              out_net=model_tr.gnn.output_function
              )  # dataset initalization into the GNN
    model_tst(dset["test"],
              state_net=model_tr.gnn.state_transition_function,
              out_net=model_tr.gnn.output_function
              )  # dataset initalization into the GNN

    # training code
    for epoch in range(1, args.epochs + 1):
        model_tr.train_step(epoch)
        if epoch % 10 == 0:
            model_tst.test_step(epoch)
            model_val.valid_step(epoch)
plot_graph(E1, N1)

E = np.concatenate((E, np.asarray(e2)), axis=0)
N_tot = np.eye(edges + edges_2, dtype=np.float32)
N_tot = np.concatenate((N_tot, np.zeros(
    (edges + edges_2, 1), dtype=np.float32)),
                       axis=1)

# Create Input to GNN

labels = np.random.randint(2, size=(N_tot.shape[0]))
#labels = np.eye(max(labels)+1, dtype=np.int32)[labels]  # one-hot encoding of labels

cfg = GNNWrapper.Config()
cfg.use_cuda = True
cfg.device = utils.prepare_device(n_gpu_use=1, gpu_id=0)
cfg.tensorboard = False
cfg.epochs = 500

cfg.activation = nn.Tanh()
cfg.state_transition_hidden_dims = [
    5,
]
cfg.output_function_hidden_dims = [5]
cfg.state_dim = 5
cfg.max_iterations = 50
cfg.convergence_threshold = 0.01
cfg.graph_based = False
cfg.log_interval = 10
cfg.task_type = "multiclass"
cfg.lrw = 0.001
예제 #25
0
def main():
    # Training settings
    parser = argparse.ArgumentParser(description='PyTorch')
    parser.add_argument('--batch-size', type=int, default=64, metavar='N',
                        help='input batch size for training (default: 64)')
    parser.add_argument('--test-batch-size', type=int, default=100, metavar='N',
                        help='input batch size for testing (default: 100)')
    parser.add_argument('--epochs', type=int, default=100000, metavar='N',
                        help='number of epochs to train (default: 10)')
    parser.add_argument('--lr', type=float, default=0.0001, metavar='LR',
                        help='learning rate (default: 0.0001)')
    parser.add_argument('--momentum', type=float, default=0.5, metavar='M',
                        help='SGD momentum (default: 0.5)')
    parser.add_argument('--no-cuda', action='store_true', default=False,
                        help='disables CUDA training')
    parser.add_argument('--cuda_dev', type=int, default=0,
                        help='select specific CUDA device for training')
    parser.add_argument('--n_gpu_use', type=int, default=1,
                        help='select number of CUDA device for training')
    # parser.add_argument('--seed', type=int, default=1, metavar='S',
    #                     help='random seed (default: 1)')
    parser.add_argument('--log-interval', type=int, default=50, metavar='N',
                        help='logging training status cadency')
    parser.add_argument('--save-model', action='store_true', default=False,
                        help='For Saving the current Model')
    parser.add_argument('--tensorboard', action='store_true', default=True,
                        help='For logging the model in tensorboard')

    args = parser.parse_args()

    use_cuda = not args.no_cuda and torch.cuda.is_available()
    if not use_cuda:
        args.n_gpu_use = 0

    device = utils.prepare_device(n_gpu_use=args.n_gpu_use, gpu_id=args.cuda_dev)
    # kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}

    # torch.manual_seed(args.seed)
    # # fix random seeds for reproducibility
    # SEED = 123
    # torch.manual_seed(SEED)
    # torch.backends.cudnn.deterministic = True
    # torch.backends.cudnn.benchmark = False
    # np.random.seed(SEED)

    # configugations
    cfg = GNNWrapper.Config()
    cfg.use_cuda = use_cuda
    cfg.device = device

    cfg.log_interval = args.log_interval
    cfg.tensorboard = args.tensorboard

    # cfg.batch_size = args.batch_size
    # cfg.test_batch_size = args.test_batch_size
    # cfg.momentum = args.momentum

    cfg.dataset_path = './data'
    cfg.epochs = args.epochs
    cfg.lrw = args.lr
    cfg.activation = nn.Tanh()
    cfg.state_transition_hidden_dims = [5,]
    cfg.output_function_hidden_dims = [5]
    cfg.state_dim = 2
    cfg.max_iterations = 50
    cfg.convergence_threshold = 0.01
    cfg.graph_based = False
    cfg.log_interval = 10
    cfg.task_type = "semisupervised"

    cfg.lrw = 0.001

    # model creation
    model = SemiSupGNNWrapper(cfg)
    # dataset creation
    E, N, targets, mask_train, mask_test = dataloader.old_load_karate()
    dset = dataloader.from_EN_to_GNN(E, N, targets, aggregation_type="sum", sparse_matrix=True)  # generate the dataset
    dset.idx_train = mask_train
    dset.idx_test = mask_test
    model(dset)  # dataset initalization into the GNN

    # training code
    for epoch in range(1, args.epochs + 1):
        model.train_step(epoch)

        if epoch % 10 == 0:
            model.test_step(epoch)
예제 #26
0
def main():
    # Training settings
    parser = argparse.ArgumentParser(description='PyTorch')
    parser.add_argument('--batch-size', type=int, default=64, metavar='N',
                        help='input batch size for training (default: 64)')
    parser.add_argument('--test-batch-size', type=int, default=100, metavar='N',
                        help='input batch size for testing (default: 100)')
    parser.add_argument('--epochs', type=int, default=300, metavar='N',
                        help='number of epochs to train (default: 10)')
    parser.add_argument('--lr', type=float, default=0.0001, metavar='LR',
                        help='learning rate (default: 0.0001)')
    parser.add_argument('--momentum', type=float, default=0.5, metavar='M',
                        help='SGD momentum (default: 0.5)')
    parser.add_argument('--no-cuda', action='store_true', default=False,
                        help='disables CUDA training')
    parser.add_argument('--cuda_dev', type=int, default=0,
                        help='select specific CUDA device for training')
    parser.add_argument('--n_gpu_use', type=int, default=1,
                        help='select number of CUDA device for training')
    # parser.add_argument('--seed', type=int, default=1, metavar='S',
    #                     help='random seed (default: 1)')
    parser.add_argument('--log-interval', type=int, default=50, metavar='N',
                        help='logging training status cadency')
    parser.add_argument('--save-model', action='store_true', default=False,
                        help='For Saving the current Model')
    parser.add_argument('--tensorboard', action='store_true', default=True,
                        help='For logging the model in tensorboard')

    args = parser.parse_args()

    use_cuda = not args.no_cuda and torch.cuda.is_available()
    if not use_cuda:
        args.n_gpu_use = 0

    device = utils.prepare_device(n_gpu_use=args.n_gpu_use, gpu_id=args.cuda_dev)
    # kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}

    # torch.manual_seed(args.seed)
    # # fix random seeds for reproducibility
    # SEED = 123
    # torch.manual_seed(SEED)
    # torch.backends.cudnn.deterministic = True
    # torch.backends.cudnn.benchmark = False
    # np.random.seed(SEED)

    # configugations
    cfg = GNNWrapper.Config()
    cfg.use_cuda = use_cuda
    cfg.device = device

    cfg.log_interval = args.log_interval
    cfg.tensorboard = args.tensorboard

    # cfg.batch_size = args.batch_size
    # cfg.test_batch_size = args.test_batch_size
    # cfg.momentum = args.momentum

    cfg.dataset_path = './data'
    cfg.epochs = args.epochs
    cfg.lrw = args.lr
    cfg.activation = nn.Tanh()
    cfg.state_transition_hidden_dims = [4]
    cfg.output_function_hidden_dims = []
    cfg.state_dim = 2
    cfg.max_iterations = 50
    cfg.convergence_threshold = 0.001
    cfg.graph_based = False
    cfg.log_interval = 10
    cfg.task_type = "semisupervised"

    cfg.lrw = 0.01

    # model creation
    model = SemiSupGNNWrapper(cfg)
    # dataset creation
    dset = dataloader.get_karate(aggregation_type="sum", sparse_matrix=True)  # generate the dataset
    #dset = dataloader.get_twochainsSSE(aggregation_type="sum", percentage=0.1, sparse_matrix=True)  # generate the dataset
    model(dset)  # dataset initalization into the GNN

    # training code

    # plotting utilities
    all_states = []
    all_outs = []
    for epoch in range(1, args.epochs + 1):
        out = model.train_step(epoch)
        all_states.append(model.gnn.converged_states.detach().to("cpu"))
        all_outs.append(out.detach().to("cpu"))

        if epoch % 10 == 0:
            model.test_step(epoch)
    # model.test_step()

    # if args.save_model:
    #     torch.save(model.gnn.state_dict(), "mnist_cnn.pt")

    import matplotlib.animation as animation
    import matplotlib.pyplot as plt
    import networkx as nx
    nx_G = nx.karate_club_graph().to_directed()

    def draw(i):
        clscolor = ['#FF0000', '#0000FF', '#FF00FF', '#00FF00']
        pos = {}
        colors = []
        for v in range(34):
            pos[v] = all_states[i][v].numpy()
            cls = all_outs[i][v].argmax(axis=-1)
            # colors.append(clscolor[cls])
            # print(clscolor[targets[v]])
            colors.append(clscolor[dset.targets[v]])
        ax.cla()
        ax.axis('off')
        ax.set_title('Epoch: %d' % i)
        #     node_sha = ["o" for i in range(34)]
        #     for j in idx_train:
        #         node_sha[j] = "s"
        node_sizes = np.full((34), 200)
        node_sizes[dset.idx_train.detach().to("cpu").numpy()] = 350
        nx.draw_networkx(nx_G.to_undirected(), pos, node_color=colors,
                         with_labels=True, node_size=node_sizes, ax=ax)

    #     nx.draw_networkx(nx_G.to_undirected().subgraph(idx_train), pos, node_color=[colors[k] for k in idx_train], node_shape='s',
    #             with_labels=True, node_size=300, ax=ax)

    fig = plt.figure(dpi=150)
    fig.clf()
    ax = fig.subplots()
    draw(0)  # draw the prediction of the first epoch
    plt.close()

    ani = animation.FuncAnimation(fig, draw, frames=len(all_states), interval=200)
    ani.save('learning.mp4', fps=30, extra_args=['-vcodec', 'libx264'])
예제 #27
0
def train_main(config):
    """
    训练函数
    :param config: ConfigParser对象
    :return: None
    """
    logger = config.get_logger('train')  # 训练数据的日志对象

    data_manager = CSVDataManager(
        config['data_loader'])  # 将json文件中指示数据载入要求信息传入CSV管理器中,装载训练集与测试集
    classes = data_manager.classes  # 获取所有类别
    num_classes = len(classes)  # 知晓类别数量

    trans_type = config['transforms']['type']  # 变换器名字
    trans_args = config['transforms']['args']  # 变换器参数
    transformation = getattr(data_module, trans_type)(trans_args)  # 对数据作变换

    train_data = data_manager.get_loader('train', transformation)  # 得到训练集
    val_data = data_manager.get_loader('val', transforms=None)  # 得到验证集

    model_name = config['model']  # 从json文件中获取模型名称
    model = ModelCalled(model_name, num_classes=num_classes)  # 召唤模型
    logger.info(model)  # 记录模型的信息

    # 为多GPU训练做准备
    device, device_ids = prepare_device(config['n_gpu'])
    model = model.to(device)
    if len(device_ids) > 1:
        model = torch.nn.DataParallel(model, device_ids=device_ids)

    # if torch.cuda.is_available():  # 检测是否能用GPU运算
    #     model = model.cuda()  # 将模型转移到GPU上去

    loss = getattr(net_utils, config['loss'])  # 获取损失函数

    metrics = [getattr(net_utils, met)
               for met in config['metrics']]  # 多分类评价标准需要传进类别数

    trainable_params = filter(
        lambda p: p.requires_grad,
        model.parameters())  # filter函数用于过滤序列,过滤掉不符合条件的元素,返回由符合条件元素组成的新列表

    optim_name = config['optimizer']['type']  # 优化器名字
    optim_args = config['optimizer']['args']  # 优化器参数
    optimizer = getattr(torch.optim, optim_name)(trainable_params,
                                                 **optim_args)

    lr_name = config['lr_scheduler']['type']  # 学习率
    lr_args = config['lr_scheduler']['args']  # 学习率参数

    if lr_name == 'None':
        lr_scheduler = None
    else:
        lr_scheduler = getattr(torch.optim.lr_scheduler, lr_name)(optimizer,
                                                                  **lr_args)

    trainer = Trainer(model=model,
                      loss=loss,
                      metrics=metrics,
                      optimizer=optimizer,
                      config=config,
                      data_loader=train_data,
                      valid_data_loader=val_data,
                      lr_scheduler=lr_scheduler,
                      device=device)
    trainer.train()
예제 #28
0
파일: test.py 프로젝트: ccfbupt/ATL-Net
def main(result_path, epoch_num):
    config = json.load(open(os.path.join(result_path, 'config.json')))

    fout_path = os.path.join(result_path, 'test_info.txt')
    fout_file = open(fout_path, 'a+')
    print_func(config, fout_file)

    trsfms = transforms.Compose([
        transforms.Resize((config['general']['image_size'],
                           config['general']['image_size'])),
        transforms.ToTensor(),
        transforms.Normalize(mean=mean, std=std),
    ])

    model = ALTNet(**config['arch'])
    print_func(model, fout_file)

    state_dict = torch.load(
        os.path.join(result_path,
                     '{}_best_model.pth'.format(config['data_name'])))
    model.load_state_dict(state_dict)

    if config['train']['loss']['name'] == 'CrossEntropyLoss':
        criterion = nn.CrossEntropyLoss(**config['train']['loss']['args'])
    else:
        raise RuntimeError

    device, _ = prepare_device(config['n_gpu'])
    model = model.to(device)
    criterion = criterion.to(device)

    total_accuracy = 0.0
    total_h = np.zeros(epoch_num)
    total_accuracy_vector = []
    for epoch_idx in range(epoch_num):
        test_dataset = ImageFolder(
            data_root=config['general']['data_root'],
            mode='test',
            episode_num=600,
            way_num=config['general']['way_num'],
            shot_num=config['general']['shot_num'],
            query_num=config['general']['query_num'],
            transform=trsfms,
        )

        print_func('The num of the test_dataset: {}'.format(len(test_dataset)),
                   fout_file)

        test_loader = torch.utils.data.DataLoader(
            test_dataset,
            batch_size=config['test']['batch_size'],
            shuffle=True,
            num_workers=config['general']['workers_num'],
            drop_last=True,
            pin_memory=True)

        print_func('============ Testing on the test set ============',
                   fout_file)
        _, accuracies = validate(test_loader, model, criterion, epoch_idx,
                                 device, fout_file,
                                 config['general']['image2level'],
                                 config['general']['print_freq'])
        test_accuracy, h = mean_confidence_interval(accuracies)
        print_func("Test Accuracy: {}\t h: {}".format(test_accuracy, h[0]),
                   fout_file)

        total_accuracy += test_accuracy
        total_accuracy_vector.extend(accuracies)
        total_h[epoch_idx] = h

    aver_accuracy, _ = mean_confidence_interval(total_accuracy_vector)
    print_func(
        'Aver Accuracy: {:.3f}\t Aver h: {:.3f}'.format(
            aver_accuracy, total_h.mean()), fout_file)
    print_func('............Testing is end............', fout_file)
예제 #29
0
def train(config):
    # setup GPU device if available, move model into configured device
    device, device_ids = prepare_device(config["n_gpu"])

    # datasets
    train_datasets = dict()
    valid_datasets = dict()
    ## train
    keys = ["datasets", "train"]
    for name in get_by_path(config, keys):
        train_datasets[name] = config.init_obj([*keys, name])
    ## valid
    valid_exist = False
    keys = ["datasets", "valid"]
    for name in get_by_path(config, keys):
        valid_exist = True
        valid_datasets[name] = config.init_obj([*keys, name])
    ## compute inverse class frequency as class weight
    if config["datasets"].get("imbalanced", False):
        target = train_datasets["data"].y_train  # TODO
        class_weight = compute_class_weight(class_weight="balanced",
                                            classes=target.unique(),
                                            y=target)
        class_weight = torch.FloatTensor(class_weight).to(device)
    else:
        class_weight = None

    # losses
    losses = dict()
    for name in config["losses"]:
        kwargs = {}
        if "balanced" in get_by_path(config, ["losses", name, "type"]):
            kwargs.update(class_weight=class_weight)
        losses[name] = config.init_obj(["losses", name], **kwargs)

    # metrics
    metrics_iter = [
        getattr(module_metric, met)
        for met in config["metrics"]["per_iteration"]
    ]
    metrics_epoch = [
        getattr(module_metric, met) for met in config["metrics"]["per_epoch"]
    ]
    if "pick_threshold" in config["metrics"]:
        metrics_threshold = config.init_obj(["metrics", "pick_threshold"])
    else:
        metrics_threshold = None

    torch_objs = {
        "datasets": {
            "train": train_datasets,
            "valid": valid_datasets
        },
        "losses": losses,
        "metrics": {
            "iter": metrics_iter,
            "epoch": metrics_epoch,
            "threshold": metrics_threshold,
        },
    }

    k_fold = config["k_fold"]
    if k_fold > 1:  # cross validation enabled
        train_datasets["data"].split_cv_indexes(k_fold)

    results = pd.DataFrame()
    Cross_Valid.create_CV(k_fold)
    start = time.time()
    for k in range(k_fold):
        # data_loaders
        train_data_loaders = dict()
        valid_data_loaders = dict()
        ## train
        keys = ["data_loaders", "train"]
        for name in get_by_path(config, keys):
            kwargs = {}
            if "imbalanced" in get_by_path(config, [*keys, name, "module"]):
                kwargs.update(class_weight=class_weight.cpu().detach().numpy(),
                              target=target)
            dataset = train_datasets[name]
            loaders = config.init_obj([*keys, name], dataset, **kwargs)
            train_data_loaders[name] = loaders.train_loader
            if not valid_exist:
                valid_data_loaders[name] = loaders.valid_loader
        ## valid
        keys = ["data_loaders", "valid"]
        for name in get_by_path(config, keys):
            dataset = valid_datasets[name]
            loaders = config.init_obj([*keys, name], dataset)
            valid_data_loaders[name] = loaders.valid_loader

        # models
        models = dict()
        logger_model = get_logger("model", verbosity=1)
        for name in config["models"]:
            model = config.init_obj(["models", name])
            logger_model.info(model)
            model = model.to(device)
            if len(device_ids) > 1:
                model = torch.nn.DataParallel(model, device_ids=device_ids)
            models[name] = model

        # optimizers
        optimizers = dict()
        for name in config["optimizers"]:
            trainable_params = filter(lambda p: p.requires_grad,
                                      models[name].parameters())
            optimizers[name] = config.init_obj(["optimizers", name],
                                               trainable_params)

        # learning rate schedulers
        lr_schedulers = dict()
        for name in config["lr_schedulers"]:
            lr_schedulers[name] = config.init_obj(["lr_schedulers", name],
                                                  optimizers[name])

        torch_objs.update({
            "data_loaders": {
                "train": train_data_loaders,
                "valid": valid_data_loaders,
            },
            "models": models,
            "optimizers": optimizers,
            "lr_schedulers": lr_schedulers,
            "amp": None,
        })

        # amp
        if config["trainer"]["kwargs"]["apex"]:
            # TODO: revise here if multiple models and optimizers
            models["model"], optimizers["model"] = amp.initialize(
                models["model"], optimizers["model"], opt_level="O1")
            torch_objs["amp"] = amp

        trainer = config.init_obj(["trainer"], torch_objs, config.save_dir,
                                  config.resume, device)
        train_log = trainer.train()
        results = pd.concat((results, train_log), axis=1)

        # cross validation
        if k_fold > 1:
            Cross_Valid.next_fold()

    msg = msg_box("result")

    end = time.time()
    total_time = consuming_time(start, end)
    msg += f"\nConsuming time: {total_time}."

    result = pd.DataFrame()
    result["mean"] = results.mean(axis=1)
    result["std"] = results.std(axis=1)
    msg += f"\n{result}"

    logger.info(msg)

    return result