def main():
    if not torch.cuda.is_available():
        print('no gpu device available')
        sys.exit(1)

    writer = None
    num_gpus = torch.cuda.device_count()
    np.random.seed(args.seed)
    args.gpu = args.local_rank % num_gpus
    args.nprocs = num_gpus
    torch.cuda.set_device(args.gpu)
    cudnn.benchmark = True
    cudnn.deterministic = True
    torch.manual_seed(args.seed)
    cudnn.enabled = True
    torch.cuda.manual_seed(args.seed)

    if args.local_rank == 0:
        args.exp = datetime.datetime.now().strftime("%YY_%mM_%dD_%HH") + "_" + \
            "{:04d}".format(random.randint(0, 1000))

    print('gpu device = %d' % args.gpu)
    print("args = %s", args)
    model = resnet20()
    # model = mutableResNet20()
    # model = dynamic_resnet20()
    model = model.cuda(args.gpu)

    if num_gpus > 1:
        torch.distributed.init_process_group(backend='nccl',
                                             init_method='env://')
        model = torch.nn.parallel.DistributedDataParallel(
            model,
            device_ids=[args.local_rank],
            output_device=args.local_rank,
            find_unused_parameters=False)

        args.world_size = torch.distributed.get_world_size()
        args.batch_size = args.batch_size // args.world_size

    # criterion_smooth = CrossEntropyLabelSmooth(args.classes, args.label_smooth)
    # criterion_smooth = criterion_smooth.cuda()
    criterion = torch.nn.CrossEntropyLoss().cuda(args.gpu)

    optimizer = torch.optim.SGD(model.parameters(),
                                lr=args.learning_rate,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    # scheduler = torch.optim.lr_scheduler.LambdaLR(
    #     optimizer, lambda step: (1.0-step/args.total_iters), last_epoch=-1)
    # scheduler = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(
    #     optimizer, T_0=5)
    # scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=10)
    # scheduler = torch.optim.lr_scheduler.LambdaLR(
    #     optimizer, lambda epoch: 1 - (epoch / args.epochs))
    scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer,
                                                     milestones=[100, 150],
                                                     last_epoch=-1)

    if args.local_rank == 0:
        writer = SummaryWriter(
            "./runs/%s-%05d" %
            (time.strftime("%m-%d", time.localtime()), random.randint(0, 100)))

    # Prepare data
    train_loader = get_train_loader(args.batch_size, args.local_rank,
                                    args.num_workers)
    # 原来跟train batch size一样,现在修改小一点 , 同时修改val_iters
    val_loader = get_val_loader(args.batch_size, args.num_workers)

    archloader = ArchLoader("data/Track1_final_archs.json")

    for epoch in range(args.epochs):
        train(train_loader, val_loader, optimizer, scheduler, model,
              archloader, criterion, args, val_iters, args.seed, epoch, writer)

        scheduler.step()
        if (epoch + 1) % args.report_freq == 0:
            top1_val, top5_val, objs_val = infer(train_loader, val_loader,
                                                 model, criterion, val_iters,
                                                 archloader, args)

            if args.local_rank == 0:
                # model
                if writer is not None:
                    writer.add_scalar("Val/loss", objs_val, epoch)
                    writer.add_scalar("Val/acc1", top1_val, epoch)
                    writer.add_scalar("Val/acc5", top5_val, epoch)

                save_checkpoint({
                    'state_dict': model.state_dict(),
                }, epoch, args.exp)
示例#2
0
def main():
    args = get_args()

    num_gpus = torch.cuda.device_count()
    np.random.seed(args.seed)
    args.gpu = args.local_rank % num_gpus
    torch.cuda.set_device(args.gpu)

    # cudnn.benchmark = True
    # cudnn.deterministic = True
    # torch.manual_seed(args.seed)
    # cudnn.enabled = True
    # torch.cuda.manual_seed(args.seed)

    if num_gpus > 1:
        torch.distributed.init_process_group(backend='nccl',
                                             init_method='env://')
        args.world_size = torch.distributed.get_world_size()
        args.batch_size = args.batch_size // args.world_size

    # Log
    log_format = '[%(asctime)s] %(message)s'
    logging.basicConfig(stream=sys.stdout,
                        level=logging.INFO,
                        format=log_format,
                        datefmt='%d %I:%M:%S')
    t = time.time()
    local_time = time.localtime(t)
    if not os.path.exists('./log'):
        os.mkdir('./log')
    fh = logging.FileHandler(
        os.path.join('log/train-{}{:02}{}'.format(local_time.tm_year % 2000,
                                                  local_time.tm_mon, t)))
    fh.setFormatter(logging.Formatter(log_format))
    logging.getLogger().addHandler(fh)

    # archLoader
    arch_loader = ArchLoader(args.path)
    arch_dataset = ArchDataSet(args.path)
    arch_sampler = None
    if num_gpus > 1:
        arch_sampler = DistributedSampler(arch_dataset)

    arch_dataloader = torch.utils.data.DataLoader(arch_dataset,
                                                  batch_size=1,
                                                  shuffle=False,
                                                  num_workers=3,
                                                  pin_memory=True,
                                                  sampler=arch_sampler)

    val_dataset = get_val_dataset()
    val_loader = torch.utils.data.DataLoader(val_dataset,
                                             batch_size=args.batch_size,
                                             shuffle=False,
                                             num_workers=args.workers,
                                             pin_memory=True)
    train_loader = get_train_loader(batch_size=args.batch_size,
                                    local_rank=0,
                                    num_workers=args.workers)

    print('load data successfully')

    model = dynamic_resnet20()

    print("load model successfully")

    print('load from latest checkpoint')
    lastest_model = args.weights
    if lastest_model is not None:
        checkpoint = torch.load(lastest_model,
                                map_location=None if True else 'cpu')
        model.load_state_dict(checkpoint['state_dict'])

    model = model.cuda(args.gpu)
    if num_gpus > 1:
        model = torch.nn.parallel.DistributedDataParallel(
            model,
            device_ids=[args.local_rank],
            output_device=args.local_rank,
            find_unused_parameters=False)

    # 参数设置
    args.val_dataloader = val_loader

    print("start to validate model...")

    validate(model, train_loader, args, arch_loader=arch_dataloader)
def main():

    if not torch.cuda.is_available():
        print('no gpu device available')
        sys.exit(1)

    writer = None
    num_gpus = torch.cuda.device_count()
    np.random.seed(args.seed)
    args.gpu = args.local_rank % num_gpus
    torch.cuda.set_device(args.gpu)
    cudnn.benchmark = True
    cudnn.deterministic = True
    torch.manual_seed(args.seed)
    cudnn.enabled = True
    torch.cuda.manual_seed(args.seed)

    print('gpu device = %d' % args.gpu)
    print("args = %s", args)

    torch.distributed.init_process_group(backend='nccl', init_method='env://')

    args.world_size = torch.distributed.get_world_size()
    args.batch_size = args.batch_size // args.world_size

    criterion_smooth = CrossEntropyLabelSmooth(args.classes, args.label_smooth)
    criterion_smooth = criterion_smooth.cuda()

    model = mutableResNet20()

    model = model.cuda(args.gpu)
    model = torch.nn.parallel.DistributedDataParallel(
        model,
        device_ids=[args.local_rank],
        output_device=args.local_rank,
        find_unused_parameters=True)

    # all_parameters = model.parameters()
    # weight_parameters = []
    # for pname, p in model.named_parameters():
    #     if p.ndimension() == 4 or 'classifier.0.weight' in pname or 'classifier.0.bias' in pname:
    #         weight_parameters.append(p)
    # weight_parameters_id = list(map(id, weight_parameters))
    # other_parameters = list(
    #     filter(lambda p: id(p) not in weight_parameters_id, all_parameters))
    # optimizer = torch.optim.SGD(
    #     [{'params': other_parameters},
    #      {'params': weight_parameters, 'weight_decay': args.weight_decay}],
    #     args.learning_rate,
    #     momentum=args.momentum,
    # )

    optimizer = torch.optim.SGD(model.parameters(),
                                lr=args.learning_rate,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    args.total_iters = args.epochs * per_epoch_iters  # // 16  # 16 代表是每个子网的个数

    scheduler = torch.optim.lr_scheduler.LambdaLR(
        optimizer, lambda step: (1.0 - step / args.total_iters), last_epoch=-1)

    if args.local_rank == 0:
        writer = SummaryWriter(
            "./runs/%s-%05d" %
            (time.strftime("%m-%d", time.localtime()), random.randint(0, 100)))

    # Prepare data
    train_loader = get_train_loader(args.batch_size, args.local_rank,
                                    args.num_workers, args.total_iters)
    train_dataprovider = DataIterator(train_loader)
    val_loader = get_val_loader(args.batch_size, args.num_workers)
    val_dataprovider = DataIterator(val_loader)

    archloader = ArchLoader("data/Track1_final_archs.json")

    train(train_dataprovider, val_dataprovider, optimizer, scheduler, model,
          archloader, criterion_smooth, args, val_iters, args.seed, writer)