Exemplo n.º 1
0
def main_train_worker(args):
    if args.gpu is not None:
        print("Use GPU: {} for training".format(args.gpu))
    print("=> creating model '{}'".format(args.arch))
    network = MetaLearnerModelBuilder.construct_cifar_model(args.arch, args.dataset)
    model_path = '{}/train_pytorch_model/real_image_model/{}@{}@epoch_{}@lr_{}@batch_{}.pth.tar'.format(
       PY_ROOT, args.dataset, args.arch, args.epochs, args.lr, args.batch_size)
    os.makedirs(os.path.dirname(model_path), exist_ok=True)
    print("after train, model will be saved to {}".format(model_path))
    network.cuda()
    image_classifier_loss = nn.CrossEntropyLoss().cuda()
    optimizer = RAdam(network.parameters(), args.lr, weight_decay=args.weight_decay)
    cudnn.benchmark = True
    train_loader = DataLoaderMaker.get_img_label_data_loader(args.dataset, args.batch_size, True)
    val_loader = DataLoaderMaker.get_img_label_data_loader(args.dataset, args.batch_size, False)

    for epoch in range(0, args.epochs):
        # adjust_learning_rate(optimizer, epoch, args)
        # train_simulate_grad_mode for one epoch
        train(train_loader, network, image_classifier_loss, optimizer, epoch, args)
        # evaluate_accuracy on validation set
        validate(val_loader, network, image_classifier_loss, args)
        # remember best acc@1 and save checkpoint
        save_checkpoint({
            'epoch': epoch + 1,
            'arch': args.arch,
            'state_dict': network.state_dict(),
            'optimizer': optimizer.state_dict(),
        }, filename=model_path)
def main(args):
    mnasnet = models.mnasnet1_0(pretrained=True).to(device).eval()
    cvae = CVAE(1000, 128, args.n_class * 2, args.n_class).to(device)
    cvae.encoder.eval()
    regressor = Regressor().to(device)
    if Path(args.cvae_resume_model).exists():
        print("load cvae model:", args.cvae_resume_model)
        cvae.load_state_dict(torch.load(args.cvae_resume_model))

    if Path(args.regressor_resume_model).exists():
        print("load regressor model:", args.regressor_resume_model)
        regressor.load_state_dict(torch.load(args.regressor_resume_model))

    image_label = pandas.read_csv(
        Path(args.data_root, args.metadata_file_name.format(
            args.subset))).sample(frac=1, random_state=551)[:250]
    image_label["class"] = image_label["class"] - 1

    dataset = WBCDataset(args.n_class,
                         image_label[:250].values,
                         args.data_root,
                         subset=args.subset,
                         train=True)
    data_loader = loader(dataset, args.batch_size, True)
    cvae_optimizer = RAdam(cvae.parameters(), weight_decay=1e-3)
    regressor_optimizer = RAdam(regressor.parameters(), weight_decay=1e-3)
    train(args, mnasnet, cvae, regressor, cvae_optimizer, regressor_optimizer,
          data_loader)
Exemplo n.º 3
0
def main_train_worker(args):
    if args.gpu is not None:
        print("Use GPU: {} for training".format(args.gpu))
    if args.dataset.startswith("CIFAR"):
        compress_mode = 2
        use_tanh = False
        resize = None
        img_size = 32
    if args.dataset == "ImageNet":
        compress_mode = 3
        use_tanh = True
        resize = 128
        img_size = 299
    elif args.dataset in ["MNIST", "FashionMNIST"]:
        compress_mode = 1
        use_tanh = False
        resize = None
        img_size = 28
    network = Codec(img_size,
                    IN_CHANNELS[args.dataset],
                    compress_mode,
                    resize=resize,
                    use_tanh=use_tanh)
    model_path = '{}/train_pytorch_model/AutoZOOM/AutoEncoder_{}@compress_{}@use_tanh_{}@epoch_{}@lr_{}@batch_{}.pth.tar'.format(
        PY_ROOT, args.dataset, compress_mode, use_tanh, args.epochs, args.lr,
        args.batch_size)
    os.makedirs(os.path.dirname(model_path), exist_ok=True)
    print("Model will be saved to {}".format(model_path))
    network.cuda()
    mse_loss_fn = nn.MSELoss().cuda()
    optimizer = RAdam(network.parameters(),
                      args.lr,
                      weight_decay=args.weight_decay)
    cudnn.benchmark = True
    train_loader = DataLoaderMaker.get_img_label_data_loader(
        args.dataset, args.batch_size, True, (img_size, img_size))
    # val_loader = DataLoaderMaker.get_img_label_data_loader(args.dataset, args.batch_size, False)

    for epoch in range(0, args.epochs):
        # adjust_learning_rate(optimizer, epoch, args)
        # train_simulate_grad_mode for one epoch
        train(train_loader, network, mse_loss_fn, optimizer, epoch, args,
              use_tanh)
        # evaluate_accuracy on validation set
        save_checkpoint(
            {
                'epoch': epoch + 1,
                'encoder': network.encoder.state_dict(),
                'decoder': network.decoder.state_dict(),
                "compress_mode": compress_mode,
                "use_tanh": use_tanh,
                'optimizer': optimizer.state_dict(),
            },
            filename=model_path)
Exemplo n.º 4
0
def main(args):
    n_relational_embeddings = args.n_class**2
    n_tag_embeddings = args.n_class
    in_ch, out_ch = 1, 128
    model = TransNFCM(in_ch,
                      out_ch,
                      n_relational_embeddings,
                      n_tag_embeddings,
                      embedding_dim=128).to(device)

    if Path(args.resume_model).exists():
        print("load model:", args.resume_model)
        model.load_state_dict(torch.load(args.resume_model))

    optimizer = RAdam(model.parameters(), weight_decay=1e-3)

    train_dataset = FMNISTDataset(n_class=args.n_class, train=True)
    test_dataset = FMNISTDataset(n_class=args.n_class, train=False)

    train_loader = loader(train_dataset, args.batch_size)
    test_loader = loader(test_dataset, 1, shuffle=False)

    # train(args, model, optimizer, train_loader)
    test(args,
         model,
         test_loader,
         show_image_on_board=args.show_image_on_board,
         show_all_embedding=args.show_all_embedding)
Exemplo n.º 5
0
def main(args):
    model = EteWave(args.n_class).to(device)

    if Path(args.resume_model).exists():
        print("load model:", args.resume_model)
        model.load_state_dict(torch.load(args.resume_model))

    # setup optimizer
    optimizer = RAdam(model.parameters())

    train_data_file_names =\
        [line.rstrip() for line in open(args.train_data_file_pointer_path)]
    test_data_file_names =\
        [line.rstrip() for line in open(args.test_data_file_pointer_path)]

    train_dataset = ActivDataset(train_data_file_names,
                                 args.root_dir,
                                 seq_len=args.train_seq_len,
                                 time_step=args.time_step,
                                 is_train=True)
    test_dataset = ActivDataset(test_data_file_names,
                                args.root_dir,
                                seq_len=args.test_seq_len,
                                time_step=args.time_step,
                                is_train=False,
                                test_in_train=True)
    train_loader = loader(train_dataset, args.batch_size)
    test_loader = loader(test_dataset, 1, shuffle=False)

    train(args, model, optimizer, train_loader)
    test(args, model, test_loader)
Exemplo n.º 6
0
def main(args):
    n_relational_embeddings = args.n_class**2
    n_tag_embeddings = args.n_class
    in_ch, out_ch, emb_dim = 3, 128, 128
    model = TransNFCM(in_ch,
                      out_ch,
                      n_relational_embeddings,
                      n_tag_embeddings,
                      embedding_dim=emb_dim).to(device)

    optimizer = RAdam(model.parameters(), weight_decay=1e-3)

    image_label = pandas.read_csv(
        Path("gs://", args.bucket_name, args.data_root,
             args.metadata_file_name.format(args.subset)))
    image_label = image_label.sample(frac=1, random_state=551)
    image_label["class"] = image_label["class"] - 1
    image_label = image_label.values

    train_dataset = WBCDataset(args.n_class,
                               image_label[:250],
                               args.data_root,
                               project=args.project,
                               bucket_name=args.bucket_name,
                               subset=args.subset,
                               train=True)
    train_loader = loader(train_dataset, args.batch_size)
    train(args, model, optimizer, train_loader)
def main(args):
    mnasnet1_0 = models.mnasnet1_0(pretrained=True).to(device).eval()
    model = CVAE(1000, 128, 128, args.n_class, 128).to(device)

    image_label = pandas.read_csv(
        Path(args.data_root, 
             args.metadata_file_name.format(args.subset))
    ).sample(frac=1, random_state=551)[:250]
    image_label["class"] = image_label["class"] - 1
    dataset = WBCDataset(image_label.values, args.data_root, subset=args.subset)

    data_loader = loader(dataset, args.batch_size, True)
    optimizer = RAdam(model.parameters(), weight_decay=1e-3)
    train(args, mnasnet1_0, model, optimizer, data_loader)
Exemplo n.º 8
0
def get_optimizer(params, train_weight, train_quant, train_bnbias, train_w_theta, train_a_theta):
    #global lr_quant
    (weight, quant, bnbias, theta_w, theta_a, skip) = params
    if args.optimizer.lower() == 'sgd':
        optimizer = optim.SGD([
            {'params': weight, 'weight_decay': args.decay, 'lr': args.lr  if train_weight else 0},
            {'params': quant, 'weight_decay': 0., 'lr': args.lr_quant if train_quant else 0},
            {'params': bnbias, 'weight_decay': 0., 'lr': args.lr_bn if train_bnbias else 0},
            {'params': theta_w, 'weight_decay': 0., 'lr': args.lr_w_theta if train_w_theta else 0},
            {'params': theta_a, 'weight_decay': 0., 'lr': args.lr_a_theta if train_a_theta else 0},
            {'params': skip, 'weight_decay': 0, 'lr': 0},
        ], momentum=args.momentum, nesterov=True)
    elif args.optimizer.lower() == 'radam':
        optimizer = RAdam([
            {'params': weight, 'weight_decay': args.decay, 'lr': args.lr  if train_weight else 0},
            {'params': quant, 'weight_decay': 0., 'lr': args.lr_quant if train_quant else 0},
            {'params': bnbias, 'weight_decay': 0., 'lr': args.lr_bn if train_bnbias else 0},
            {'params': theta_w, 'weight_decay': 0., 'lr': args.lr_w_theta if train_w_theta else 0},
            {'params': theta_a, 'weight_decay': 0., 'lr': args.lr_a_theta if train_a_theta else 0},
            {'params': skip, 'weight_decay': 0, 'lr': 0},
        ],)
    else:
        raise ValueError
    return optimizer
Exemplo n.º 9
0
def main(args):
    with mlflow.start_run():
        # Log our parameters into mlflow
        for key, value in vars(args).items():
            mlflow.log_param(key, value)

        n_tag_embeddings = args.n_class
        in_ch, out_ch = 1, 128
        model = TransNFCM(in_ch, out_ch, n_tag_embeddings,
                          embedding_dim=128).to(device)

        if Path(args.resume_model).exists():
            print("load model:", args.resume_model)
            model.load_state_dict(torch.load(args.resume_model))

        optimizer = RAdam(model.parameters(), weight_decay=1e-3)

        train_dataset = FMNISTDataset(n_class=args.n_class, train=True)
        test_dataset = FMNISTDataset(n_class=args.n_class, train=False)

        train_loader = loader(train_dataset, args.batch_size)
        test_loader = loader(test_dataset, 1, shuffle=False)

        if args.train:
            train(args, model, optimizer, train_loader)
        test(args,
             model,
             test_loader,
             show_image_on_board=args.show_image_on_board,
             show_all_embedding=args.show_all_embedding)

        # Upload the TensorBoard event logs as a run artifact
        print("Uploading TensorBoard events as a run artifact...")
        mlflow.log_artifacts(args.out_dir, artifact_path="events")
        print("\nLaunch TensorBoard with:\n\ntensorboard --logdir=%s" %
              Path(mlflow.get_artifact_uri(), "events"))
Exemplo n.º 10
0
    def get_bert_optimizer(self, opt, model):
        # Prepare optimizer and schedule (linear warmup and decay)
        no_decay = ['bias', 'LayerNorm.weight']
        diff_part = ["bert.embeddings", "bert.encoder"]

        if opt.diff_lr:
            logger.info("layered learning rate on")
            optimizer_grouped_parameters = [
                {
                    "params": [
                        p for n, p in model.named_parameters()
                        if not any(nd in n for nd in no_decay) and any(
                            nd in n for nd in diff_part)
                    ],
                    "weight_decay":
                    opt.weight_decay,
                    "lr":
                    opt.bert_lr
                },
                {
                    "params": [
                        p for n, p in model.named_parameters()
                        if any(nd in n
                               for nd in no_decay) and any(nd in n
                                                           for nd in diff_part)
                    ],
                    "weight_decay":
                    0.0,
                    "lr":
                    opt.bert_lr
                },
                {
                    "params": [
                        p for n, p in model.named_parameters()
                        if not any(nd in n for nd in no_decay) and not any(
                            nd in n for nd in diff_part)
                    ],
                    "weight_decay":
                    opt.weight_decay,
                    "lr":
                    opt.layers_lr
                },
                {
                    "params": [
                        p for n, p in model.named_parameters()
                        if any(nd in n for nd in no_decay) and not any(
                            nd in n for nd in diff_part)
                    ],
                    "weight_decay":
                    0.0,
                    "lr":
                    opt.layers_lr
                },
            ]

            # 选择优化器
            if opt.optimizer == 'AdamW':
                optimizer = AdamW(optimizer_grouped_parameters,
                                  eps=opt.adam_epsilon)
                logger.info("Choose AdamW")
            elif opt.optimizer == 'RAdam':
                optimizer = RAdam(optimizer_grouped_parameters,
                                  eps=opt.adam_epsilon)
                logger.info("Choose RAdam")
            elif opt.optimizer == 'Ranger':
                optimizer = Ranger(optimizer_grouped_parameters,
                                   eps=opt.adam_epsilon)
                logger.info("Choose Ranger")
            else:
                logger.info("Please input correct optimizer!")
        else:
            logger.info("bert learning rate on")
            optimizer_grouped_parameters = [{
                'params': [
                    p for n, p in model.named_parameters()
                    if not any(nd in n for nd in no_decay)
                ],
                'weight_decay':
                opt.weight_decay
            }, {
                'params': [
                    p for n, p in model.named_parameters()
                    if any(nd in n for nd in no_decay)
                ],
                'weight_decay':
                0.0
            }]
            # 选择优化器
            if opt.optimizer == 'AdamW':
                optimizer = AdamW(
                    optimizer_grouped_parameters,
                    lr=opt.bert_lr,
                    eps=opt.adam_epsilon)  #  weight_decay=opt.l2reg
                logger.info("Choose AdamW")
            elif opt.optimizer == 'RAdam':
                optimizer = RAdam(optimizer_grouped_parameters,
                                  lr=opt.bert_lr,
                                  eps=opt.adam_epsilon)
                logger.info("Choose RAdam")
            elif opt.optimizer == 'Ranger':
                optimizer = Ranger(optimizer_grouped_parameters,
                                   lr=opt.bert_lr,
                                   eps=opt.adam_epsilon)
                logger.info("Choose Ranger")
            else:
                logger.info("Please input correct optimizer!")

        return optimizer
def main():
    if config.gpu and not torch.cuda.is_available():
        raise ValueError("GPU not supported or enabled on this system.")
    use_gpu = config.gpu

    log.info("Loading train dataset")
    train_dataset = COVIDxFolder(
        config.train_imgs, config.train_labels,
        transforms.train_transforms(config.width, config.height))
    train_loader = DataLoader(train_dataset,
                              batch_size=config.batch_size,
                              shuffle=True,
                              drop_last=True,
                              num_workers=config.n_threads,
                              pin_memory=use_gpu)
    log.info("Number of training examples {}".format(len(train_dataset)))

    log.info("Loading val dataset")
    val_dataset = COVIDxFolder(
        config.val_imgs, config.val_labels,
        transforms.val_transforms(config.width, config.height))
    val_loader = DataLoader(val_dataset,
                            batch_size=config.batch_size,
                            shuffle=False,
                            num_workers=config.n_threads,
                            pin_memory=use_gpu)
    log.info("Number of validation examples {}".format(len(val_dataset)))

    if config.weights:
        # state = torch.load(config.weights)
        state = None
        log.info("Loaded model weights from: {}".format(config.weights))
    else:
        state = None

    state_dict = state["state_dict"] if state else None
    model = architecture.COVIDEfficientnet(n_classes=config.n_classes)
    if state_dict:
        model = util.load_model_weights(model=model, state_dict=state_dict)

    if use_gpu:
        model.cuda()
        model = torch.nn.DataParallel(model)
    optim_layers = filter(lambda p: p.requires_grad, model.parameters())

    # optimizer and lr scheduler
    optimizer = RAdam(optim_layers,
                      lr=config.lr,
                      weight_decay=config.weight_decay)
    scheduler = ReduceLROnPlateau(optimizer=optimizer,
                                  factor=config.lr_reduce_factor,
                                  patience=config.lr_reduce_patience,
                                  mode='max',
                                  min_lr=1e-7)

    # Load the last global_step from the checkpoint if existing
    global_step = 0 if state is None else state['global_step'] + 1

    class_weights = util.to_device(torch.FloatTensor(config.loss_weights),
                                   gpu=use_gpu)
    loss_fn = CrossEntropyLoss()

    # Reset the best metric score
    best_score = -1

    # Training
    for epoch in range(config.epochs):
        log.info("Started epoch {}/{}".format(epoch + 1, config.epochs))
        for data in train_loader:
            imgs, labels = data
            imgs = util.to_device(imgs, gpu=use_gpu)
            labels = util.to_device(labels, gpu=use_gpu)

            logits = model(imgs)
            loss = loss_fn(logits, labels)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            if global_step % config.log_steps == 0 and global_step > 0:
                probs = model.module.probability(logits)
                preds = torch.argmax(probs, dim=1).detach().cpu().numpy()
                labels = labels.cpu().detach().numpy()
                acc, f1, _, _ = util.clf_metrics(preds, labels)
                lr = util.get_learning_rate(optimizer)

                log.info("Step {} | TRAINING batch: Loss {:.4f} | F1 {:.4f} | "
                         "Accuracy {:.4f} | LR {:.2e}".format(
                             global_step, loss.item(), f1, acc, lr))

            if global_step % config.eval_steps == 0 and global_step > 0:
                best_score = validate(val_loader,
                                      model,
                                      best_score=best_score,
                                      global_step=global_step,
                                      cfg=config)
                scheduler.step(best_score)
            global_step += 1
class Optimizer(nn.Module):
    def __init__(self, model):
        super(Optimizer, self).__init__()
        self.setup_optimizer(model)

    def setup_optimizer(self, model):
        params = []
        for key, value in model.named_parameters():
            if not value.requires_grad:
                continue
            lr = cfg.SOLVER.BASE_LR
            weight_decay = cfg.SOLVER.WEIGHT_DECAY
            if "bias" in key:
                lr = cfg.SOLVER.BASE_LR * cfg.SOLVER.BIAS_LR_FACTOR
                weight_decay = cfg.SOLVER.WEIGHT_DECAY_BIAS
            params += [{
                "params": [value],
                "lr": lr,
                "weight_decay": weight_decay
            }]

        if cfg.SOLVER.TYPE == 'SGD':
            self.optimizer = torch.optim.SGD(params,
                                             lr=cfg.SOLVER.BASE_LR,
                                             momentum=cfg.SOLVER.SGD.MOMENTUM)
        elif cfg.SOLVER.TYPE == 'ADAM':
            self.optimizer = torch.optim.Adam(params,
                                              lr=cfg.SOLVER.BASE_LR,
                                              betas=cfg.SOLVER.ADAM.BETAS,
                                              eps=cfg.SOLVER.ADAM.EPS)
        elif cfg.SOLVER.TYPE == 'ADAMAX':
            self.optimizer = torch.optim.Adamax(params,
                                                lr=cfg.SOLVER.BASE_LR,
                                                betas=cfg.SOLVER.ADAM.BETAS,
                                                eps=cfg.SOLVER.ADAM.EPS)
        elif cfg.SOLVER.TYPE == 'ADAGRAD':
            self.optimizer = torch.optim.Adagrad(params, lr=cfg.SOLVER.BASE_LR)
        elif cfg.SOLVER.TYPE == 'RMSPROP':
            self.optimizer = torch.optim.RMSprop(params, lr=cfg.SOLVER.BASE_LR)
        elif cfg.SOLVER.TYPE == 'RADAM':
            self.optimizer = RAdam(params,
                                   lr=cfg.SOLVER.BASE_LR,
                                   betas=cfg.SOLVER.ADAM.BETAS,
                                   eps=cfg.SOLVER.ADAM.EPS)
        else:
            raise NotImplementedError

        if cfg.SOLVER.LR_POLICY.TYPE == 'Fix':
            self.scheduler = None
        elif cfg.SOLVER.LR_POLICY.TYPE == 'Step':
            self.scheduler = torch.optim.lr_scheduler.StepLR(
                self.optimizer,
                step_size=cfg.SOLVER.LR_POLICY.STEP_SIZE,
                gamma=cfg.SOLVER.LR_POLICY.GAMMA)
        elif cfg.SOLVER.LR_POLICY.TYPE == 'Plateau':
            self.scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
                self.optimizer,
                factor=cfg.SOLVER.LR_POLICY.PLATEAU_FACTOR,
                patience=cfg.SOLVER.LR_POLICY.PLATEAU_PATIENCE)
        elif cfg.SOLVER.LR_POLICY.TYPE == 'Noam':
            self.scheduler = lr_scheduler.create(
                'Noam',
                self.optimizer,
                model_size=cfg.SOLVER.LR_POLICY.MODEL_SIZE,
                factor=cfg.SOLVER.LR_POLICY.FACTOR,
                warmup=cfg.SOLVER.LR_POLICY.WARMUP)
        elif cfg.SOLVER.LR_POLICY.TYPE == 'MultiStep':
            self.scheduler = lr_scheduler.create(
                'MultiStep',
                self.optimizer,
                milestones=cfg.SOLVER.LR_POLICY.STEPS,
                gamma=cfg.SOLVER.LR_POLICY.GAMMA)
        else:
            raise NotImplementedError

    def zero_grad(self):
        self.optimizer.zero_grad()

    def step(self):
        self.optimizer.step()

    def scheduler_step(self, lrs_type, val=None):
        if self.scheduler is None:
            return

        if cfg.SOLVER.LR_POLICY.TYPE != 'Plateau':
            val = None

        if lrs_type == cfg.SOLVER.LR_POLICY.SETP_TYPE:
            self.scheduler.step(val)

    def get_lr(self):
        lr = []
        for param_group in self.optimizer.param_groups:
            lr.append(param_group['lr'])
        lr = sorted(list(set(lr)))
        return lr
    def setup_optimizer(self, model):
        params = []
        for key, value in model.named_parameters():
            if not value.requires_grad:
                continue
            lr = cfg.SOLVER.BASE_LR
            weight_decay = cfg.SOLVER.WEIGHT_DECAY
            if "bias" in key:
                lr = cfg.SOLVER.BASE_LR * cfg.SOLVER.BIAS_LR_FACTOR
                weight_decay = cfg.SOLVER.WEIGHT_DECAY_BIAS
            params += [{
                "params": [value],
                "lr": lr,
                "weight_decay": weight_decay
            }]

        if cfg.SOLVER.TYPE == 'SGD':
            self.optimizer = torch.optim.SGD(params,
                                             lr=cfg.SOLVER.BASE_LR,
                                             momentum=cfg.SOLVER.SGD.MOMENTUM)
        elif cfg.SOLVER.TYPE == 'ADAM':
            self.optimizer = torch.optim.Adam(params,
                                              lr=cfg.SOLVER.BASE_LR,
                                              betas=cfg.SOLVER.ADAM.BETAS,
                                              eps=cfg.SOLVER.ADAM.EPS)
        elif cfg.SOLVER.TYPE == 'ADAMAX':
            self.optimizer = torch.optim.Adamax(params,
                                                lr=cfg.SOLVER.BASE_LR,
                                                betas=cfg.SOLVER.ADAM.BETAS,
                                                eps=cfg.SOLVER.ADAM.EPS)
        elif cfg.SOLVER.TYPE == 'ADAGRAD':
            self.optimizer = torch.optim.Adagrad(params, lr=cfg.SOLVER.BASE_LR)
        elif cfg.SOLVER.TYPE == 'RMSPROP':
            self.optimizer = torch.optim.RMSprop(params, lr=cfg.SOLVER.BASE_LR)
        elif cfg.SOLVER.TYPE == 'RADAM':
            self.optimizer = RAdam(params,
                                   lr=cfg.SOLVER.BASE_LR,
                                   betas=cfg.SOLVER.ADAM.BETAS,
                                   eps=cfg.SOLVER.ADAM.EPS)
        else:
            raise NotImplementedError

        if cfg.SOLVER.LR_POLICY.TYPE == 'Fix':
            self.scheduler = None
        elif cfg.SOLVER.LR_POLICY.TYPE == 'Step':
            self.scheduler = torch.optim.lr_scheduler.StepLR(
                self.optimizer,
                step_size=cfg.SOLVER.LR_POLICY.STEP_SIZE,
                gamma=cfg.SOLVER.LR_POLICY.GAMMA)
        elif cfg.SOLVER.LR_POLICY.TYPE == 'Plateau':
            self.scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
                self.optimizer,
                factor=cfg.SOLVER.LR_POLICY.PLATEAU_FACTOR,
                patience=cfg.SOLVER.LR_POLICY.PLATEAU_PATIENCE)
        elif cfg.SOLVER.LR_POLICY.TYPE == 'Noam':
            self.scheduler = lr_scheduler.create(
                'Noam',
                self.optimizer,
                model_size=cfg.SOLVER.LR_POLICY.MODEL_SIZE,
                factor=cfg.SOLVER.LR_POLICY.FACTOR,
                warmup=cfg.SOLVER.LR_POLICY.WARMUP)
        elif cfg.SOLVER.LR_POLICY.TYPE == 'MultiStep':
            self.scheduler = lr_scheduler.create(
                'MultiStep',
                self.optimizer,
                milestones=cfg.SOLVER.LR_POLICY.STEPS,
                gamma=cfg.SOLVER.LR_POLICY.GAMMA)
        else:
            raise NotImplementedError
Exemplo n.º 14
0
def base_runner():
    #######################################
    # Search Working Device
    #######################################
    print("-" * 100)
    cc = get_arguments()
    if cc.get('local_rank') == 0:
        wandb.init(project=PROJECT_NAME, dir=cc.get('log_dir'))
        wandb.config.update(
            cc)  # adds all of the arguments as config variables
        #print(f"W & B Log Dir:{wandb.wandb_dir()}")
    print("-" * 100)
    #######################################
    # Setting Dataset
    #######################################
    if cc.get('fp16'):
        torch.cuda.set_device(cc.get('local_rank'))
        torch.distributed.init_process_group(backend='nccl',
                                             init_method='env://')
    train_loader, test_loader, train_sampler = ds.get_dataset(
        ds.Dataset[cc.get('dataset')],
        cc.get('data_dir'),
        batch_size=cc.get('batch_size'),
        num_workers=cc.get('num_workers'),
        distributed=cc.get('fp16'),
        enable_auto_augmentation='efficient' in cc.get('network_name'))
    loss = nn.CrossEntropyLoss()
    #######################################
    # Search Working Device
    #######################################
    working_device = torch.device(
        "cuda" if torch.cuda.is_available() else "cpu")
    print(working_device.type)
    print("-" * 100)
    ######################################
    # Set Model and Load
    #####################################
    nc = networks.NetworkQuantizationController(
        quantization_config=get_q_config_same(
            list(
                range(cc.get('activation_min_bit_width'),
                      cc.get('activation_max_bit_width') + 1)),
            cc.get('bits_list'), cc.get('n_thresholds_shifts')),
        quantization_part=QUANTIZATION[cc.get('quantization_part')],
        ste=cc.get('gumbel_ste'))
    net = networks.get_network_function(cc.get('network_name'))(
        nc, pretrained=True)

    init_net(net, train_loader)

    net = update_quantization_coefficient(net)
    param_out_list, activation_scale_params, variable_scale_params = model_coefficient_split(
        net)
    ######################################
    # Build Optimizer and Loss function
    #####################################
    optimizer = RAdam([{
        'params': activation_scale_params,
        'lr': cc.get('lr_activation'),
        'weight_decay': 0.0
    }, {
        'params': variable_scale_params,
        'lr': cc.get('lr_coefficient'),
        'weight_decay': 0.0
    }])
    optimizer_net = RAdam([{
        'params': param_out_list,
        'lr': cc.get('lr_start'),
        'weight_decay': cc.get('weight_decay')
    }])
    net = net.to('cuda')
    #if cc.get('fp16'):
    #    net, optimizers = amp.initialize(net, [optimizer, optimizer_net],
    #                                     opt_level='O1',
    #                                     keep_batchnorm_fp32=None,
    #                                     loss_scale=None
    #                                     )
    optimizers = [optimizer, optimizer_net]
    net = common.multiple_gpu_enable(net, apex=cc.get('fp16'))
    ##################################
    # Inital accuracy evalution
    ##################################
    #test_base_acc = common.accuracy_evaluation(net, test_loader, working_device)
    #print("Network Weight Loading Done with Accuracy:", test_base_acc)
    print('-' * 100)
    ######################################
    # Enable Quantization
    #####################################
    nc.apply_fix_point()
    #####################################
    # Search Max thresholds
    #####################################
    print("Initial thresholds", get_thresholds_list(nc, net)[0])
    nc.set_temperature(1)
    nc.enable_statistics_update()  # enable statistics collection
    train_acc = common.accuracy_evaluation(net, train_loader, working_device)
    nc.disable_statistics_update()  # disable statistics collection
    print("Initial Thresholds at the end of statistics update",
          get_thresholds_list(nc, net)[0], train_acc)
    #####################################
    # Retrain
    #####################################
    temp_func = common.get_exp_cycle_annealing(
        cc.get('cycle_size') * len(train_loader), cc.get('temp_step'),
        np.round(len(train_loader) / cc.get('n_gumbel')))
    gamma_target_func = common.get_step_annealing(cc.get('cycle_size'),
                                                  CR_START_W,
                                                  cc.get('target_compression'),
                                                  cc.get('n_target_steps'))
    gamma_target_func_activation = common.get_step_annealing(
        cc.get('cycle_size'), CR_START_A,
        cc.get('target_compression_activation'), cc.get('n_target_steps'))
    print("-" * 100)
    print("Starting Training")

    scaler = torch.cuda.amp.GradScaler()

    single_iteration_training_joint(net,
                                    cc,
                                    nc,
                                    train_loader,
                                    test_loader,
                                    optimizers,
                                    loss,
                                    temp_func,
                                    cc.get('gamma'),
                                    gamma_target_func,
                                    gamma_target_func_activation,
                                    working_device,
                                    amp_flag=cc.get('fp16'),
                                    train_sampler=train_sampler,
                                    gamma_rate=cc.get('gamma_rate'),
                                    scaler=scaler)
    final_stage_training(net,
                         cc,
                         nc,
                         train_loader,
                         test_loader, [optimizers[1]],
                         loss,
                         working_device,
                         cc.get('fp16'),
                         train_sampler,
                         scaler=scaler)
Exemplo n.º 15
0
)

valid_dataloader = DataLoader(
    valid_dataset,
    batch_size=int(config.batch_size // (config.valid_waveform_sec / config.waveform_sec)),
    shuffle=False,
    num_workers=config.num_workers,
    pin_memory=False,
)
# -------------------------------------/>

# Setting Optimizer
if config.optimizer == "adam":
    optimizer = optim.Adam(filter(lambda x: x.requires_grad, net.parameters()), lr=config.lr)
elif config.optimizer == "radam":
    optimizer = RAdam(filter(lambda x: x.requires_grad, net.parameters()), lr=config.lr)
else:
    raise NotImplementedError
# -------------------------------------/>

# Setting Scheduler
if config.lr_scheduler == "cosine":
    # restart every T_0 * validation_interval steps
    scheduler = optim.lr_scheduler.CosineAnnealingWarmRestarts(
        optimizer, T_0=20, eta_min=config.lr_min
    )
elif config.lr_scheduler == "plateau":
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(
        optimizer, mode="max", patience=5, factor=config.lr_decay
    )
elif config.lr_scheduler == "multi":