Exemplo n.º 1
0
 def _init_params(self):
     self.net = get_net(self.config['model'], self.config['load_weights'])
     self.net.cuda()
     self.model = get_model(self.config['model'])
     self.criterion = get_loss(self.config['model'])
     self.optimizer = self._get_optim()
     self.scheduler = optim.lr_scheduler.MultiStepLR(
         self.optimizer, milestones=[40, 55, 70, 95], gamma=0.5)
Exemplo n.º 2
0
 def _init_params(self):
     self.criterionG, criterionD = get_loss(self.config['model'])
     self.netG, netD = get_nets(self.config['model'])
     self.netG.to(self.device)
     self.adv_trainer = self._get_adversarial_trainer(self.config['model']['d_name'], netD, criterionD)
     self.model = get_model(self.config['model'])
     self.optimizer_G = self._get_optim(filter(lambda p: p.requires_grad, self.netG.parameters()))
     self.optimizer_D = self._get_optim(self.adv_trainer.get_params())
     self.scheduler_G = self._get_scheduler(self.optimizer_G)
     self.scheduler_D = self._get_scheduler(self.optimizer_D)
Exemplo n.º 3
0
 def _init_params(self):
     self.criterionG, criterionD = get_loss(self.config['model'])
     self.netG, netD = get_nets(self.config['model'])
     self.netG.cuda()
     self.adv_trainer = self._get_adversarial_trainer(
         self.config['model']['d_name'], netD, criterionD)
     self.model = get_model(self.config['model'])
     self.optimizer_G = self._get_optim(
         filter(lambda p: p.requires_grad, self.netG.parameters()))
     self.optimizer_D = self._get_optim(self.adv_trainer.get_params())
     self.scheduler_G = self._get_scheduler(self.optimizer_G)
     self.scheduler_D = self._get_scheduler(self.optimizer_D)
     # load state dict
     self.netG.load_state_dict(
         torch.load("best_fpn.h5", map_location='cpu')['model'])
Exemplo n.º 4
0
    def _init_params(self):
        self.criterionG, criterionD = get_loss(self.config['model'])
        netG, netD = get_nets(self.config['model'])
        model = netG.cuda()

        ############ model 加载 继续训练
        checkpoint = torch.load('best_{}.h5'.format(
            self.config['experiment_desc']))
        model.load_state_dict(checkpoint['model'])
        self.netG = model

        self.adv_trainer = self._get_adversarial_trainer(
            self.config['model']['d_name'], netD, criterionD)
        self.model = get_model(self.config['model'])
        self.optimizer_G = self._get_optim(
            filter(lambda p: p.requires_grad, self.netG.parameters()))
        self.optimizer_D = self._get_optim(self.adv_trainer.get_params())
        self.scheduler_G = self._get_scheduler(self.optimizer_G)
        self.scheduler_D = self._get_scheduler(self.optimizer_D)
Exemplo n.º 5
0
def main():
    global args, use_gpu, writer, rank, logger, best_top1, world_size, rank
    args = parser.parse_args()
    with open(args.config) as f:
        config = yaml.load(f)

    #######  visualize configs ######
    visualize_configurations(config)
    #######  set args ######
    for key in config:
        for k, v in config[key].items():
            setattr(args, k, v)
    if args.verbose:
        print('Config parsing complete')

    #######  world initial ######
    if args.distributed:
        rank, world_size = dist.dist_init(args.port, 'nccl')
        if rank == 0:
            tbpath = os.path.join(args.logpath, 'tb', args.task_name)
            if os.path.isdir(tbpath):
                writer = SummaryWriter(log_dir=tbpath)
            else:
                os.makedirs(tbpath)
                writer = SummaryWriter(log_dir=tbpath)
            writer.add_text('config_infomation', transfer_txt(args))
            logger = loggers.get_logger(
                os.path.join(args.logpath,
                             '{}.distlog'.format(args.task_name)))
            logger.info("Logger is set ")
            logger.info("Logger with distribution")
    else:

        tbpath = os.path.join(args.logpath, 'tb', args.task_name)
        if os.path.isdir(tbpath):
            writer = SummaryWriter(log_dir=tbpath)
        else:
            os.makedirs(tbpath)
            writer = SummaryWriter(log_dir=tbpath)
        writer.add_text('config_infomation', transfer_txt(args))
        logger = loggers.get_logger(
            os.path.join(args.logpath, '{}.log'.format(args.task_name)))
        logger.info("Logger is set ")
        logger.info("Logger without distribution")

    ######## initial random setting #######

    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    torch.cuda.manual_seed_all(args.seed)

    torch.backends.cudnn.benchmark = True

    ######## test data reading ########

    since = time.time()
    dataset_train_val = base_dataset.baseline_dataset(args)
    train_loader, val_loader = dataset_train_val.get_loader()
    logger.info(
        "Initializing dataset used {} basic time unit".format(time.time() -
                                                              since))

    logger.info("The training classes labels length :  {}".format(
        len(dataset_train_val.train_classnames)))
    since = time.time()
    inputs, classes = next(iter(train_loader))
    logger.info('batch loading time example is {}'.format(time.time() - since))

    ######### Init model ############
    if args.model_name == 'resnet50_middle':
        model = baseline_cls.resnet50_middle(
            len(dataset_train_val.train_classnames),
            droprate=args.dropoutrate,
            pretrain=args.pretrain,
            return_f=args.reture_bottleneck_feature,
            return_mid=args.return_middle_level_feature)
    else:
        model = baseline_cls.PCB(len(dataset_train_val.train_classnames))

    #logger.info(model)
    if args.PCB:
        model = baseline_cls.PCB(len(dataset_train_val.train_classnames))

    ########## lauch training ###########

    woptimizer = optimizers.get_optimizer(args, model)
    lr_schedular = optimizers.get_lr_scheduler(args, woptimizer)
    criterion = losses.get_loss(args)

    if args.resume != '' and os.path.isfile(args.resume):
        if args.distributed:
            if rank == 0:
                print('resuem from [%s]' % config.resume)
            checkpoint = torch.load(args.resume,
                                    map_location='cuda:%d' %
                                    torch.cuda.current_device())
        else:
            print('resuem from [%s]' % config.resume)
            checkpoint = torch.load(config.resume, map_location="cpu")

        model.load_state_dict(checkpoint['network'])
        #woptimizer.load_state_dict(checkpoint['optimizer'])
        #lr_scheduler.load_state_dict(checkpoint['lr_scheduler'])
        epoch_offset = checkpoint['epoch']
    else:
        epoch_offset = 0

    model.to(device)
    if args.distributed:
        dist.sync_state(model)

    if args.fp16:
        model, woptimizer = amp.initialize(model, woptimizer, opt_level="O1")

    for epoch in range(epoch_offset, args.epochs):

        # train
        train(args,
              train_loader,
              val_loader,
              model,
              woptimizer,
              lr_schedular,
              epoch=epoch,
              criterion=criterion)

        # validation
        cur_step = (epoch + 1) * len(train_loader)
        top1 = validate(args,
                        val_loader,
                        model,
                        epoch=epoch,
                        cur_step=cur_step,
                        criterion=criterion)

        if args.distributed:
            if rank == 0:
                if best_top1 < top1:
                    best_top1 = top1
                    save_network(args, model, epoch, top1, isbest=True)
                else:
                    if epoch % args.forcesave == 0:
                        save_network(args, model, epoch, top1)
                writer.add_scalar('val/best_top1', best_top1, cur_step)

        else:
            if best_top1 < top1:
                best_top1 = top1
                save_network(args, model, epoch, top1, isbest=True)
            else:
                if epoch % args.forcesave == 0:
                    save_network(args, model, epoch, top1)

            writer.add_scalar('val/best_top1', best_top1, cur_step)

        if args.distributed:
            if rank == 0:
                logger.info("Final best Prec@1 = {:.4%}".format(best_top1))
                #logger.info("Best Genotype = {}".format(best_genotype))
        else:
            logger.info("Final best Prec@1 = {:.4%}".format(best_top1))
def main():
    global args, use_gpu, writer, rank, logger, best_top1, world_size, rank
    args = parser.parse_args()
    with open(args.config) as f:
        config = yaml.load(f)

    #######  visualize configs ######
    visualize_configurations(config)
    #######  set args ######
    for key in config:
        for k, v in config[key].items():
            setattr(args, k, v)
    if args.verbose:
        print('Config parsing complete')

    #######  world initial ######
    if args.distributed:
        rank, world_size = dist.dist_init(args.port, 'nccl')
        logger = loggers.get_logger(
            os.path.join(args.logpath, '{}.distlog'.format(args.task_name)))
        if rank == 0:
            tbpath = os.path.join(args.logpath, 'tb', args.task_name)
            if os.path.isdir(tbpath):
                writer = SummaryWriter(log_dir=tbpath)
            else:
                os.makedirs(tbpath)
                writer = SummaryWriter(log_dir=tbpath)
            writer.add_text('config_infomation', transfer_txt(args))

            logger.info("Logger is set ")
            logger.info("Logger with distribution")
    else:

        tbpath = os.path.join(args.logpath, 'tb', args.task_name)
        if os.path.isdir(tbpath):
            writer = SummaryWriter(log_dir=tbpath)
        else:
            os.makedirs(tbpath)
            writer = SummaryWriter(log_dir=tbpath)
        writer.add_text('config_infomation', transfer_txt(args))
        logger = loggers.get_logger(
            os.path.join(args.logpath, '{}.log'.format(args.task_name)))
        logger.info("Logger is set ")
        logger.info("Logger without distribution")

    ######## initial random setting #######

    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    torch.cuda.manual_seed_all(args.seed)

    torch.backends.cudnn.benchmark = True

    ######## test data reading ########

    since = time.time()
    dataset_train_val = base_dataset.baseline_dataset(args)
    train_loader, val_loader = dataset_train_val.get_loader()
    logger.info(
        "Initializing dataset used {} basic time unit".format(time.time() -
                                                              since))

    logger.info("The training classes labels length :  {}".format(
        len(dataset_train_val.train_classnames)))
    since = time.time()
    inputs, classes = next(iter(train_loader))
    logger.info('batch loading time example is {}'.format(time.time() - since))

    ######### Init model ############
    #woptimizer =  optimizers.get_optimizer(args, model)
    #lr_schedular = optimizers.get_lr_scheduler(args, woptimizer)
    criterion = losses.get_loss(args)

    criterion.to(device)

    if args.model_name == 'Darts_normal':
        model = SearchCNNController(args.input_channels, args.init_channels,
                                    len(dataset_train_val.train_classnames),
                                    args.Search_layers, criterion)
    else:
        model = SearchCNNController(args.input_channels, args.init_channels,
                                    len(dataset_train_val.train_classnames),
                                    args.Search_layers, criterion)

    model = model.to(device)
    if args.distributed:
        dist.sync_state(model)

    w_optim = torch.optim.SGD(model.weights(),
                              args.w_lr,
                              momentum=args.w_momentum,
                              weight_decay=args.w_weight_decay)

    alpha_optim = torch.optim.Adam(model.alphas(),
                                   args.alpha_lr,
                                   betas=(0.5, 0.999),
                                   weight_decay=args.alpha_weight_decay)

    lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
        w_optim, args.epochs, eta_min=args.w_lr_min)
    architect = Architect(model, args.w_momentum, args.w_weight_decay, args)

    ########## lauch training ###########

    if args.resume != '' and os.path.isfile(args.resume):
        if args.distributed:
            if rank == 0:
                print('resuem from [%s]' % config.resume)
            checkpoint = torch.load(args.resume,
                                    map_location='cuda:%d' %
                                    torch.cuda.current_device())
        else:
            print('resuem from [%s]' % config.resume)
            checkpoint = torch.load(config.resume, map_location="cpu")

        model.load_state_dict(checkpoint['network'])
        #woptimizer.load_state_dict(checkpoint['optimizer'])
        #lr_scheduler.load_state_dict(checkpoint['lr_scheduler'])
        epoch_offset = checkpoint['epoch']
    else:
        epoch_offset = 0

    model.to(device)

    if args.fp16:
        model, w_optim = amp.initialize(model, w_optim, opt_level="O1")

    for epoch in range(epoch_offset, args.epochs):
        if args.distributed:
            if rank == 0:
                model.print_alphas(logger)
        else:
            model.print_alphas(logger)

        # train
        if epoch % args.real_val_freq == 0:
            train(args,
                  train_loader,
                  val_loader,
                  model,
                  architect,
                  w_optim,
                  alpha_optim,
                  lr_scheduler,
                  epoch=epoch)
        else:
            train(args,
                  train_loader,
                  train_loader,
                  model,
                  architect,
                  w_optim,
                  alpha_optim,
                  lr_scheduler,
                  epoch=epoch)
        # validation
        cur_step = (epoch + 1) * len(train_loader)

        top1 = validate(args,
                        val_loader,
                        model,
                        epoch=epoch,
                        cur_step=cur_step)

        if args.distributed:
            if rank == 0:
                if best_top1 < top1:
                    best_top1 = top1
                    save_network(args, model, epoch, top1, isbest=True)
                else:
                    if epoch % args.forcesave == 0:
                        save_network(args, model, epoch, top1)
                writer.add_scalar('val/best_top1', best_top1, cur_step)

        else:
            if best_top1 < top1:
                best_top1 = top1
                save_network(args, model, epoch, top1, isbest=True)
            else:
                if epoch % args.forcesave == 0:
                    save_network(args, model, epoch, top1)

            writer.add_scalar('val/best_top1', best_top1, cur_step)

        if args.distributed:
            if rank == 0:
                logger.info("Final best Prec@1 = {:.4%}".format(best_top1))
                #logger.info("Best Genotype = {}".format(best_genotype))
        else:
            logger.info("Final best Prec@1 = {:.4%}".format(best_top1))

        genotype = model.genotype()

        if args.distributed:

            if rank == 0:
                logger.info("genotype = {}".format(genotype))

                if args.plot_path != False:

                    plot_path = os.path.join(args.plot_path, args.task_name,
                                             "EP{:02d}".format(epoch + 1))
                    if not os.path.isdir(
                            os.path.join(args.plot_path, args.task_name)):
                        os.makedirs(
                            os.path.join(args.plot_path, args.task_name))
                    caption = "Epoch {}".format(epoch + 1)
                    plot(genotype.normal, plot_path + "-normal", caption)
                    plot(genotype.reduce, plot_path + "-reduce", caption)

                    writer.add_image(plot_path + '.png')

        else:
            logger.info("genotype = {}".format(genotype))

            if args.plot_path != False:
                if not os.path.isdir(
                        os.path.join(args.plot_path, args.task_name)):
                    os.makedirs(os.path.join(args.plot_path, args.task_name))
                plot_path = os.path.join(args.plot_path, args.task_name,
                                         "EP{:02d}".format(epoch + 1))
                caption = "Epoch {}".format(epoch + 1)
                plot(genotype.normal, plot_path + "-normal", caption)
                plot(genotype.reduce, plot_path + "-reduce", caption)

                writer.add_image(plot_path + '.png')
Exemplo n.º 7
0
    def train(self, x_train: np.ndarray, y_train: np.ndarray):
        """ Train a BNN using input datapoints `x_train` with corresponding labels `y_train`.
        Parameters
        ----------
        x_train : numpy.ndarray (N, D)
            Input training datapoints.
        y_train : numpy.ndarray (N,)
            Input training labels.
        """
        logging.debug("Training started.")

        logging.debug("Clearing list of sampled weights.")
        self.sampled_weights.clear()

        num_datapoints, input_dimensionality = x_train.shape
        output_dimensionality = 1
        if y_train.ndim > 1:
            _, output_dimensionality = y_train.shape

        logging.debug(
            "Processing %d training datapoints "
            " with % dimensions each." % (num_datapoints, input_dimensionality)
        )

        x_train_ = np.asarray(x_train)

        if self.normalize_input:
            logging.debug(
                "Normalizing training datapoints to "
                " zero mean and unit variance."
            )
            x_train_, self.x_mean, self.x_std = zero_mean_unit_var_normalization(x_train)

        y_train_ = np.asarray(y_train)

        if self.normalize_output:
            logging.debug("Normalizing training labels to zero mean and unit variance.")
            y_train_, self.y_mean, self.y_std = zero_mean_unit_var_normalization(y_train)

        train_loader = infinite_dataloader(
            data_utils.DataLoader(
                data_utils.TensorDataset(
                    torch.from_numpy(x_train_).float().type(dtype),
                    torch.from_numpy(y_train_).float().type(dtype)
                ),
                batch_size=self.batch_size
            )
        )

        try:
            architecture_name = self.network_architecture.__name__
        except AttributeError:
            architecture_name = str(self.network_architecture)
        logging.debug("Using network architecture: %s" % architecture_name)

        if output_dimensionality == 1:
            self.model = self.network_architecture(
                input_dimensionality=input_dimensionality
            )
        else:
            self.model = self.network_architecture(
                input_dimensionality=input_dimensionality,
                output_dimensionality=output_dimensionality
            )

        try:
            optimizer_name = self.optimizer.__name__
        except AttributeError:
            optimizer_name = str(self.optimizer)

        logging.debug("Using optimizer: %s" % optimizer_name)

        optimizer = get_optimizer(
            optimizer_cls=self.optimizer,
            parameters=self.model.parameters(),
            num_datapoints=num_datapoints,
            **self.optimizer_kwargs
        )

        loss_function = get_loss(
            self.loss, parameters=self.model.parameters(),
            num_datapoints=num_datapoints, size_average=True
        )

        if self.use_progressbar:
            logging.info(
                "Progress bar enabled. To disable pass "
                "`logging_configuration={level: debug.WARN}`."
            )

            losses = OrderedDict(((get_name(self.loss), loss_function),))
            losses.update(
                (get_name(metric), to_bayesian_loss(metric)())
                for metric in self.metrics
            )

            batch_generator = TrainingProgressbar(
                iterable=islice(enumerate(train_loader), self.num_steps),
                losses=losses,
                total=self.num_steps,
                bar_format="{n_fmt}/{total_fmt}[{bar}] - {remaining} - {postfix}"
            )
        else:
            batch_generator = islice(enumerate(train_loader), self.num_steps)

        for epoch, (x_batch, y_batch) in batch_generator:
            optimizer.zero_grad()
            loss = loss_function(input=self.model(x_batch), target=y_batch)
            loss.backward()
            optimizer.step()

            if self.use_progressbar:
                predictions = self.model(x_batch)
                batch_generator.update(
                    predictions=predictions, y_batch=y_batch, epoch=epoch
                )

            if self._keep_sample(epoch):
                logging.debug("Recording sample, epoch = %d " % (epoch))
                weights = self.network_weights
                logging.debug("Sampled weights:\n%s" % str(weights))
                self.sampled_weights.append(weights)

        self.is_trained = True
        return self
Exemplo n.º 8
0
                                   OneToThreeDimension(),
                                   ToTensor(),
                                   Normalize(mean=[0.485, 0.456, 0.406],
                                             std=[0.229, 0.224, 0.225]),
                                  ])
    acdc_dataset = {x: ACDCDataset(train_args["pos_samps_"+x],
                                  train_args["neg_samps_"+x],
                                  transform=composed)
                   for x in ["train", "val", "test"]}

    dataloader = {x: DataLoader(acdc_dataset[x],
                                batch_size=train_args["batch_size"],
                                shuffle=True, num_workers=4,
                                # sampler=sampler[x]
                                )
                  for x in ["train", "val", "test"]}
    dataset_sizes = {x: len(acdc_dataset[x]) for x in ["train", "val", "test"]}

    model_ft = get_model(train_args["model"], device,
                         pretrained=train_args["pretrained"])

    criterion = get_loss(train_args["loss_name"])

    optimizer_ft = optim.Adam(model_ft.parameters(), lr=1e-5)

    model_ft = train(model_ft, criterion, optimizer_ft,
                     num_epochs=train_args["epoch"])

    test(model_ft, dataloader["test"], dataset_sizes["test"])