Exemplo n.º 1
0
def get_model(arch, wts_path):
    if arch == 'alexnet':
        model = AlexNet()
        model.fc = nn.Sequential()
        load_weights(model, wts_path)
    elif arch == 'pt_alexnet':
        model = models.alexnet()
        classif = list(model.classifier.children())[:5]
        model.classifier = nn.Sequential(*classif)
        load_weights(model, wts_path)
    elif arch == 'mobilenet':
        model = MobileNetV2()
        model.fc = nn.Sequential()
        load_weights(model, wts_path)
    elif 'resnet' in arch:
        model = models.__dict__[arch]()
        model.fc = nn.Sequential()
        load_weights(model, wts_path)
    else:
        raise ValueError('arch not found: ' + arch)

    for p in model.parameters():
        p.requires_grad = False

    return model
Exemplo n.º 2
0
def train(train_dataset, val_dataset, configs):

    train_loader = torch.utils.data.DataLoader(
            train_dataset,
            batch_size = configs["batch_size"],
            shuffle = True
    )

    val_loader = torch.utils.data.DataLoader(
            val_dataset,
            batch_size = configs["batch_size"],
            shuffle = False
    )

    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    model = AlexNet().to(device)

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(params = model.parameters(), lr = configs["lr"])

    for epoch in range(configs["epochs"]):

        model.train()
        running_loss = 0.0
        correct = 0

        for i, (inputs, labels) in tqdm(enumerate(train_loader)):

            inputs, labels = inputs.to(device), labels.squeeze().to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            _, predicted = torch.max(outputs.data, 1)
            correct += (predicted == labels).sum().item()

            running_loss += loss.item()

        print("[%d] loss: %.4f" %
                  (epoch + 1, running_loss / train_dataset.__len__()))

        model.eval()
        correct = 0

        with torch.no_grad():

            for i, (inputs, labels) in tqdm(enumerate(val_loader)):

                inputs, labels = inputs.to(device), labels.squeeze().to(device)
                outputs = model(inputs)
                _, predicted = torch.max(outputs.data, 1)
                correct += (predicted == labels).sum().item()

        print("Accuracy of the network on the %d test images: %.4f %%" %
                (val_dataset.__len__(), 100. * correct / val_dataset.__len__()))

    torch.save(model.state_dict(), "/opt/output/model.pt")
def jobSetup():
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    exit = False  # Exit jobsetup Boolean
    joblist = []
    while (not exit):
        # These booleans control the state of the menu
        SessionTypeBool = True
        ModelTypeBool = True
        EpochBool = True
        TrainBatchBool = True
        OptimBool = True
        TestBatchBool = True
        jobBool = True

        #--------------------------------------Model Selection--------------------------------------#
        while (ModelTypeBool):
            modeltype = input(
                " a.Alexnet \n b.VGG16  \n c.ResNext  \n d.VGGv2\n   >")
            if (modeltype != 'a' and modeltype != 'b' and modeltype != 'c'
                    and modeltype != 'd'):
                print("Please input a valid model input")
                ModelTypeBool = True

            if (modeltype == 'a'):
                model = AlexNet()
                modeldict = 'Alexnet-model.pt'
                modelname = "Alexnet"
                valtrain = 32
                valtest = 136
                optimizer = optim.Adam(model.parameters(), lr=0.001)
                ModelTypeBool = False

            elif (modeltype == 'b'):
                model = VGG16()
                modeldict = 'VGG16-model.pt'
                modelname = "VGG16"
                valtrain = 32
                valtest = 136
                optimizer = optim.SGD(model.parameters(), lr=0.001)
                ModelTypeBool = False

            elif (modeltype == 'c'):
                model = resnext50_32x4d()
                modeldict = 'ResNext50-model.pt'
                modelname = "ResNext50"
                valtrain = 32
                valtest = 136
                optimizer = optim.Adam(model.parameters(), lr=0.001)
                ModelTypeBool = False

            elif (modeltype == 'd'):
                model = VGG_v2()
                modeldict = 'VGGv2-model.pt'
                modelname = "VGGv2"
                valtrain = 32
                valtest = 136
                optimizer = optim.Adam(model.parameters(), lr=0.001)
                ModelTypeBool = False

        print(modelname + ": chosen")

        #------------------------------------Session Selection--------------------------------------#
        while (SessionTypeBool):
            sessiontype = input(
                " a.Start Training a new model \n b.Test the model \n   >")
            if (sessiontype != 'a' and sessiontype != 'b'
                    and sessiontype != 'c'):
                print("Please input a valid session input")
                SessionTypeBool = True
            if (sessiontype == 'a'):
                SessionTypeBool = False
                print("From Stratch: chosen")
            elif (sessiontype == 'b'):
                SessionTypeBool = False
                TrainBatchBool = False
                OptimBool = False
                EpochBool = False
                valtrain = 1
                epochval = 1
                print("Testing: chosen")
        #UNCOMMENT FOR CONTINUE TRAINING OPTION Uncomment and use at your own risk!
            """
         elif (sessiontype == 'c'):
            SessionTypeBool = False
            print ("Testing: chosen")
         """
        #------------------------------------Epoch Selection--------------------------------------#
        while (EpochBool):
            epoch = input(" Number of Epochs:   ")
            try:
                epochval = int(epoch)
                print(f'\nEpochs chosen: {epochval}')
                EpochBool = False
            except ValueError:
                print("Please input a valid Epochs input")
                EpochBool = True

        # This section is DEVELOPER USE ONLY. We do not want the user to change the training or test batch numbers
        # as this can lead to CUDA out of memory errors. Uncomment and use at your own risk!
        """
      #------------------------------------Optimiser Selection---------------------------------#
      while (OptimBool):
         optimiseinput = input(" Optimizer (Debug): \n a.Adam \n b.SGD  \n   >") 
         if (optimiseinput != 'a' and optimiseinput != 'b'):
            print ("Please input a valid Optimizer input")
            OptimBool = True
         if (optimiseinput == 'a'):  
            optimizer = optim.Adam(model.parameters(), lr=0.001)
            print ("Adam chosen")
            OptimBool = False
         elif (optimiseinput == 'b'):
            optimizer = optim.SGD(model.parameters(), lr=0.001)
            print ("SGD chosen")
            OptimBool = False
      #------------------------------------Batch Selection---------------------------------#
      while (TrainBatchBool):
         trainbatch = input(" Number of train batchs (Debug):   ")
         try:
            valtrain = int(trainbatch)
            print(f'\ntraining batchs chosen: {valtrain}')
            TrainBatchBool = False
         except ValueError:
            print ("Please input a valid batchs input")
            TrainBatchBool = True

      while (TestBatchBool):
         testbatch = input(" Number of test batchs (Debug):   ")
         try:
            valtest = int(testbatch)
            print(f'\ntest batchs chosen: {valtest}')
            TestBatchBool = False
         except ValueError:
            print ("Please input a valid batchs input")
            TestBatchBool = True
      """
        #------------------------------------Job Menu---------------------------------------#
        job = jobclass(sessiontype, model, modeldict, optimizer, epochval,
                       device, valtrain, valtest, modelname)
        joblist.append(job)

        while (jobBool):
            finish = input(
                " Would you like to run another Model after? y/n:   ")
            if (finish != 'y' and finish != 'n'):
                print("Please input a valid job input")
                jobBool = True
            if (finish == 'y'):
                jobBool = False
                print("Add another job")

            if (finish == 'n'):
                jobBool = False
                exit = True
                print("Jobs Executing")
    return joblist
Exemplo n.º 4
0
# net = resnet50()
#net = resnet18()
# 重写网络最后一层
#fc_in_features = net.fc.in_features  # 网络最后一层的输入通道
#net.fc = nn.Linear(in_features=fc_in_features, out_features=cfg.num_classes)

# 将网络结构、损失函数放置在GPU上;配置优化器
net = net.to(cfg.device)
# net = nn.DataParallel(net, device_ids=[0, 1])
# criterion=nn.BCELoss()
#criterion = nn.BCEWithLogitsLoss().cuda(device=cfg.device)
criterion = nn.CrossEntropyLoss().cuda(device=cfg.device)
# 常规优化器:随机梯度下降和Adam
#optimizer = optim.SGD(params=net.parameters(), lr=cfg.learning_rate,
#                      weight_decay=cfg.weight_decay, momentum=cfg.momentum)
optimizer = optim.Adam(params=net.parameters(), lr=cfg.learning_rate,
                       weight_decay=cfg.weight_decay)
# 线性学习率优化器
#optimizer = optim.SGD(params=net.parameters(), lr=cfg.learning,
                     # weight_decay=cfg.weight_decay, momentum=cfg.momentum)

# --------------进行训练-----------------
# print('进行训练....')
# train_and_valid_(net, criterion=criterion,
#                  optimizer=optimizer,
#                  train_loader=train_loader,
#                  valid_loader=valid_loader, cfg=cfg,
#                  is_lr_warmup=False, is_lr_adjust=False)

# -------------进行测试-----------------
print('进行测试.....')
Exemplo n.º 5
0
def train(data_train, data_val, num_classes, num_epoch, milestones):
    model = AlexNet(num_classes, pretrain=False)
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    model = model.to(device)

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.0001)
    lr_scheduler = MultiStepLR(optimizer, milestones=milestones, gamma=0.1)

    since = time.time()
    best_acc = 0
    best = 0
    for epoch in range(num_epoch):
        print('Epoch {}/{}'.format(epoch + 1, num_epoch))
        print('-' * 10)


        # Iterate over data.
        running_loss = 0.0
        running_corrects = 0
        model.train()
        with torch.set_grad_enabled(True):
            for i, (inputs, labels) in enumerate(data_train):
                inputs = inputs.to(device)
                labels = labels.to(device)

                outputs = model(inputs)
                _, preds = torch.max(outputs, 1)
                loss = criterion(outputs, labels)

                optimizer.zero_grad()
                loss.backward()
                optimizer.step()

                running_loss += loss.item()
                running_corrects += torch.sum(preds == labels.data) * 1. / inputs.size(0)
                print("\rIteration: {}/{}, Loss: {}.".format(i + 1, len(data_train), loss.item()), end="")

                sys.stdout.flush()

        avg_loss = running_loss / len(data_train)
        t_acc = running_corrects.double() / len(data_train)

        running_loss = 0.0
        running_corrects = 0
        model.eval()
        with torch.set_grad_enabled(False):
            for i, (inputs, labels) in enumerate(data_val):
                inputs = inputs.to(device)
                labels = labels.to(device)

                outputs = model(inputs)
                _, preds = torch.max(outputs, 1)
                loss = criterion(outputs, labels)

                running_loss += loss.item()
                running_corrects += torch.sum(preds == labels.data) * 1. / inputs.size(0)

        val_loss = running_loss / len(data_val)
        val_acc = running_corrects.double() / len(data_val)

        print()
        print('Train Loss: {:.4f} Acc: {:.4f}'.format(avg_loss, t_acc))
        print('Val Loss: {:.4f} Acc: {:.4f}'.format(val_loss, val_acc))
        print('lr rate: {:.6f}'.format(optimizer.param_groups[0]['lr']))
        print()

        if val_acc > best_acc:
            best_acc = val_acc
            best = epoch + 1

        lr_scheduler.step()

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))
    print('Best Validation Accuracy: {}, Epoch: {}'.format(best_acc, best))

    return model
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--stage', default='train', type=str)
    parser.add_argument('--dataset', default='imagenet', type=str)
    parser.add_argument('--lr', default=0.0012, type=float)
    parser.add_argument('--batch_size', default=128, type=int)
    parser.add_argument('--gpus', default='0,1,2,3', type=str)
    parser.add_argument('--weight_decay', default=1e-5, type=float)
    parser.add_argument('--max_epoch', default=30, type=int)
    parser.add_argument('--lr_decay_steps', default='15,20,25', type=str)
    parser.add_argument('--exp', default='', type=str)
    parser.add_argument('--list', default='', type=str)
    parser.add_argument('--resume_path', default='', type=str)
    parser.add_argument('--pretrain_path', default='', type=str)
    parser.add_argument('--n_workers', default=32, type=int)

    parser.add_argument('--network', default='resnet50', type=str)

    global args
    args = parser.parse_args()

    if not os.path.exists(args.exp):
        os.makedirs(args.exp)
    if not os.path.exists(os.path.join(args.exp, 'runs')):
        os.makedirs(os.path.join(args.exp, 'runs'))
    if not os.path.exists(os.path.join(args.exp, 'models')):
        os.makedirs(os.path.join(args.exp, 'models'))
    if not os.path.exists(os.path.join(args.exp, 'logs')):
        os.makedirs(os.path.join(args.exp, 'logs'))

    # logger initialize
    logger = getLogger(args.exp)

    device_ids = list(map(lambda x: int(x), args.gpus.split(',')))
    device = torch.device('cuda: 0')

    train_loader, val_loader = cifar.get_semi_dataloader(
        args) if args.dataset.startswith(
            'cifar') else imagenet.get_semi_dataloader(args)

    # create model
    if args.network == 'alexnet':
        network = AlexNet(128)
    elif args.network == 'alexnet_cifar':
        network = AlexNet_cifar(128)
    elif args.network == 'resnet18_cifar':
        network = ResNet18_cifar()
    elif args.network == 'resnet50_cifar':
        network = ResNet50_cifar()
    elif args.network == 'wide_resnet28':
        network = WideResNet(28, args.dataset == 'cifar10' and 10 or 100, 2)
    elif args.network == 'resnet18':
        network = resnet18()
    elif args.network == 'resnet50':
        network = resnet50()
    network = nn.DataParallel(network, device_ids=device_ids)
    network.to(device)

    classifier = nn.Linear(2048, 1000).to(device)
    # create optimizer

    parameters = network.parameters()
    optimizer = torch.optim.SGD(
        parameters,
        lr=args.lr,
        momentum=0.9,
        weight_decay=args.weight_decay,
    )

    cls_optimizer = torch.optim.SGD(
        classifier.parameters(),
        lr=args.lr * 50,
        momentum=0.9,
        weight_decay=args.weight_decay,
    )

    cudnn.benchmark = True

    # create memory_bank
    global writer
    writer = SummaryWriter(comment='SemiSupervised',
                           logdir=os.path.join(args.exp, 'runs'))

    # create criterion
    criterion = nn.CrossEntropyLoss()

    logging.info(beautify(args))
    start_epoch = 0
    if args.pretrain_path != '' and args.pretrain_path != 'none':
        logging.info('loading pretrained file from {}'.format(
            args.pretrain_path))
        checkpoint = torch.load(args.pretrain_path)
        state_dict = checkpoint['state_dict']
        valid_state_dict = {
            k: v
            for k, v in state_dict.items()
            if k in network.state_dict() and 'fc.' not in k
        }
        for k, v in network.state_dict().items():
            if k not in valid_state_dict:
                logging.info('{}: Random Init'.format(k))
                valid_state_dict[k] = v
        # logging.info(valid_state_dict.keys())
        network.load_state_dict(valid_state_dict)
    else:
        logging.info('Training SemiSupervised Learning From Scratch')

    logging.info('start training')
    best_acc = 0.0
    try:
        for i_epoch in range(start_epoch, args.max_epoch):
            train(i_epoch, network, classifier, criterion, optimizer,
                  cls_optimizer, train_loader, device)

            checkpoint = {
                'epoch': i_epoch + 1,
                'state_dict': network.state_dict(),
                'optimizer': optimizer.state_dict(),
            }
            torch.save(checkpoint,
                       os.path.join(args.exp, 'models', 'checkpoint.pth'))
            adjust_learning_rate(args.lr_decay_steps, optimizer, i_epoch)
            if i_epoch % 2 == 0:
                acc1, acc5 = validate(i_epoch, network, classifier, val_loader,
                                      device)
                if acc1 >= best_acc:
                    best_acc = acc1
                    torch.save(checkpoint,
                               os.path.join(args.exp, 'models', 'best.pth'))
                writer.add_scalar('acc1', acc1, i_epoch + 1)
                writer.add_scalar('acc5', acc5, i_epoch + 1)

            if i_epoch in [30, 60, 120, 160, 200]:
                torch.save(
                    checkpoint,
                    os.path.join(args.exp, 'models',
                                 '{}.pth'.format(i_epoch + 1)))

            logging.info(
                colorful('[Epoch: {}] val acc: {:.4f}/{:.4f}'.format(
                    i_epoch, acc1, acc5)))
            logging.info(
                colorful('[Epoch: {}] best acc: {:.4f}'.format(
                    i_epoch, best_acc)))

            with torch.no_grad():
                for name, param in network.named_parameters():
                    if 'bn' not in name:
                        writer.add_histogram(name, param, i_epoch)

            # cluster
    except KeyboardInterrupt as e:
        logging.info('KeyboardInterrupt at {} Epochs'.format(i_epoch))
        exit()
Exemplo n.º 7
0
def main():
  # Init logger
  if not os.path.isdir(args.save_path):
    os.makedirs(args.save_path)
  log = open(os.path.join(args.save_path, 'log_seed_{}.txt'.format(args.manualSeed)), 'w')
  print_log('save path : {}'.format(args.save_path), log)
  state = {k: v for k, v in args._get_kwargs()}
  print_log(state, log)
  print_log("Random Seed: {}".format(args.manualSeed), log)
  print_log("python version : {}".format(sys.version.replace('\n', ' ')), log)
  print_log("torch  version : {}".format(torch.__version__), log)
  print_log("cudnn  version : {}".format(torch.backends.cudnn.version()), log)

  # Data loading code
  # Any other preprocessings? http://pytorch.org/audio/transforms.html
  sample_length = 10000
  scale = transforms.Scale()
  padtrim = transforms.PadTrim(sample_length)
  downmix = transforms.DownmixMono()
  transforms_audio = transforms.Compose([
    scale, padtrim, downmix
  ])

  if not os.path.isdir(args.data_path):
    os.makedirs(args.data_path)
  train_dir = os.path.join(args.data_path, 'train')
  val_dir = os.path.join(args.data_path, 'val')

  #Choose dataset to use
  if args.dataset == 'arctic':
    # TODO No ImageFolder equivalent for audio. Need to create a Dataset manually
    train_dataset = Arctic(train_dir, transform=transforms_audio, download=True)
    val_dataset = Arctic(val_dir, transform=transforms_audio, download=True)
    num_classes = 4
  elif args.dataset == 'vctk':
    train_dataset = dset.VCTK(train_dir, transform=transforms_audio, download=True)
    val_dataset = dset.VCTK(val_dir, transform=transforms_audio, download=True)
    num_classes = 10
  elif args.dataset == 'yesno':
    train_dataset = dset.YESNO(train_dir, transform=transforms_audio, download=True)
    val_dataset = dset.YESNO(val_dir, transform=transforms_audio, download=True)
    num_classes = 2
  else:
    assert False, 'Dataset is incorrect'

  train_loader = torch.utils.data.DataLoader(
    train_dataset,
    batch_size=args.batch_size,
    shuffle=True,
    num_workers=args.workers,
    # pin_memory=True, # What is this?
    # sampler=None     # What is this?
  )
  val_loader = torch.utils.data.DataLoader(
    val_dataset,
    batch_size=args.batch_size, shuffle=False,
    num_workers=args.workers, pin_memory=True)


  #Feed in respective model file to pass into model (alexnet.py)
  print_log("=> creating model '{}'".format(args.arch), log)
  # Init model, criterion, and optimizer
  # net = models.__dict__[args.arch](num_classes)
  net = AlexNet(num_classes)
  #
  print_log("=> network :\n {}".format(net), log)

  # net = torch.nn.DataParallel(net, device_ids=list(range(args.ngpu)))

  # define loss function (criterion) and optimizer
  criterion = torch.nn.CrossEntropyLoss()

  # Define stochastic gradient descent as optimizer (run backprop on random small batch)
  optimizer = torch.optim.SGD(net.parameters(), state['learning_rate'], momentum=state['momentum'],
                weight_decay=state['decay'], nesterov=True)

  #Sets use for GPU if available
  if args.use_cuda:
    net.cuda()
    criterion.cuda()

  recorder = RecorderMeter(args.epochs)
  # optionally resume from a checkpoint
  # Need same python vresion that the resume was in 
  if args.resume:
    if os.path.isfile(args.resume):
      print_log("=> loading checkpoint '{}'".format(args.resume), log)
      if args.ngpu == 0:
        checkpoint = torch.load(args.resume, map_location=lambda storage, loc: storage)
      else:
        checkpoint = torch.load(args.resume)

      recorder = checkpoint['recorder']
      args.start_epoch = checkpoint['epoch']
      net.load_state_dict(checkpoint['state_dict'])
      optimizer.load_state_dict(checkpoint['optimizer'])
      print_log("=> loaded checkpoint '{}' (epoch {})" .format(args.resume, checkpoint['epoch']), log)
    else:
      print_log("=> no checkpoint found at '{}'".format(args.resume), log)
  else:
    print_log("=> do not use any checkpoint for {} model".format(args.arch), log)

  if args.evaluate:
    validate(val_loader, net, criterion, 0, log, val_dataset)
    return

  # Main loop
  start_time = time.time()
  epoch_time = AverageMeter()

  # Training occurs here
  for epoch in range(args.start_epoch, args.epochs):
    current_learning_rate = adjust_learning_rate(optimizer, epoch, args.gammas, args.schedule)

    need_hour, need_mins, need_secs = convert_secs2time(epoch_time.avg * (args.epochs-epoch))
    need_time = '[Need: {:02d}:{:02d}:{:02d}]'.format(need_hour, need_mins, need_secs)

    print_log('\n==>>{:s} [Epoch={:03d}/{:03d}] {:s} [learning_rate={:6.4f}]'.format(time_string(), epoch, args.epochs, need_time, current_learning_rate) \
                + ' [Best : Accuracy={:.2f}, Error={:.2f}]'.format(recorder.max_accuracy(False), 100-recorder.max_accuracy(False)), log)

    print("One epoch")
    # train for one epoch
    # Call to train (note that our previous net is passed into the model argument)
    train_acc, train_los = train(train_loader, net, criterion, optimizer, epoch, log, train_dataset)

    # evaluate on validation set
    #val_acc,   val_los   = extract_features(test_loader, net, criterion, log)
    val_acc,   val_los   = validate(val_loader, net, criterion, epoch, log, val_dataset)
    is_best = recorder.update(epoch, train_los, train_acc, val_los, val_acc)

    save_checkpoint({
      'epoch': epoch + 1,
      'arch': args.arch,
      'state_dict': net.state_dict(),
      'recorder': recorder,
      'optimizer' : optimizer.state_dict(),
    }, is_best, args.save_path, 'checkpoint.pth.tar')

    # measure elapsed time
    epoch_time.update(time.time() - start_time)
    start_time = time.time()
    recorder.plot_curve( os.path.join(args.save_path, 'curve.png') )

  log.close()
Exemplo n.º 8
0
            P += (pred_cls == input_label_tensor).sum().cpu().detach().numpy()
            N += HyperParams["batch_size"]
        if idx % 500 == 499:
            print("|acc:%f|use time:%s|" %
                  (float(P / N), str(time.time() - start_time)))
            start_time = time.time()

            # print('')


if __name__ == '__main__':
    train_data = mnist.MNIST("./mnist_data")
    model = AlexNet(10)
    if HyperParams["cuda"]:
        model = model.cuda()
    optimer = torch.optim.Adam(params=[{
        "params": model.parameters()
    }],
                               lr=0.004)
    lr_sch = torch.optim.lr_scheduler.MultiStepLR(optimer, [1, 2, 3, 4], 0.1)
    criterion = torch.nn.CrossEntropyLoss()
    static_params = torch.load("./%s_E%d.snap" %
                               (HyperParams["model_save_prefix"], 4))
    model.load_state_dict(static_params)
    # trainval(model,optimer,lr_sch,criterion,train_data)
    if HyperParams["quantize"]:
        model = torch.quantization.quantize_dynamic(model)
    torch.save(model.state_dict(), "./quantize_mode.snap")
    test(model, train_data)