Пример #1
0
def get_data():
    dataset_dir = os.path.abspath(settings.DATASET_DIR)
    return data.get_dataset(
        dataset_dir,
        settings.DATASET,
        settings.TEMPLES,
        settings.SPLIT,
        settings.BATCH_SIZE,
        settings.BUFFER_SIZE,
    )
Пример #2
0
    def load_data(self):

        dataset = get_dataset(self.args.data, normalize=self.args.normalize)
        self.args.num_features, self.args.num_classes, self.args.avg_num_nodes = dataset.num_features, dataset.num_classes, np.ceil(
            np.mean([data.num_nodes for data in dataset]))
        print('# %s: [FEATURES]-%d [NUM_CLASSES]-%d [AVG_NODES]-%d' %
              (dataset, self.args.num_features, self.args.num_classes,
               self.args.avg_num_nodes))

        return dataset
Пример #3
0
def graft_block(args):
    os.makedirs('log', exist_ok=True)
    global logger
    logger = Logger('log/graft_block_{}_{}_num_per_class_{}.txt'.\
                format(args.dataset, time.strftime("%Y-%m-%d_%H-%M-%S", time.localtime()),
                       args.num_per_class))

    cfg_t = cfgs['vgg16']
    cfg_s = cfgs['vgg16-graft']

    cfg_blocks_t = split_block(cfg_t)
    cfg_blocks_s = split_block(cfg_s)

    num_block = len(block_graft_ids)
    # ---------------------- Adaption ----------------------
    adaptions_t2s = [
        nn.Conv2d(cfg_blocks_t[block_graft_ids[i]][-2],
                  cfg_blocks_s[block_graft_ids[i]][-2],
                  kernel_size=1).cuda() for i in range(0, num_block - 1)
    ]

    for m in adaptions_t2s:
        init_conv(m)

    adaptions_s2t = [
        nn.Conv2d(cfg_blocks_s[block_graft_ids[i]][-2],
                  cfg_blocks_t[block_graft_ids[i]][-2],
                  kernel_size=1).cuda() for i in range(0, num_block - 1)
    ]

    for m in adaptions_s2t:
        init_conv(m)

    # ---------------------- Network ----------------------
    teacher = vgg_stock(cfg_t, args.dataset, args.num_class)
    student = vgg_bw(cfg_s, True, args.dataset, args.num_class)

    params_t = torch.load(args.ckpt)

    teacher.cuda().eval()
    teacher.load_state_dict(params_t)

    params_s = {}
    for key in params_t.keys():
        key_split = key.split('.')
        if key_split[0] == 'features' and \
                key_split[1] in ['0', '1', '2']:
            params_s[key] = params_t[key]

    student.cuda().train()
    student.load_state_dict(params_s, strict=False)

    blocks_s = [student.features[i] for i in block_graft_ids[:-1]]
    blocks_s += [nn.Sequential(nn.Flatten().cuda(), student.classifier)]

    # ---------------------- Optimizer ----------------------
    optimizers_s = [
        optim.Adam(blocks_s[i].parameters(), lr=args.lrs_s[i])
        for i in range(0, num_block)
    ]

    optimizers_adapt_t2s = [
        optim.Adam(adaptions_t2s[i].parameters(), lr=args.lrs_adapt_t2s[i])
        for i in range(0, num_block - 1)
    ]

    optimizers_adapt_s2t = [
        optim.Adam(adaptions_s2t[i].parameters(), lr=args.lrs_adapt_s2t[i])
        for i in range(0, num_block - 1)
    ]
    # ---------------------- Datasets ----------------------
    if args.dataset == 'CIFAR10':
        train_loader = DataLoader(CIFAR10Few(args.data_path,
                                             args.num_per_class,
                                             transform=get_transformer(
                                                 args.dataset,
                                                 cropsize=32,
                                                 crop_padding=4,
                                                 hflip=True)),
                                  batch_size=args.batch_size,
                                  num_workers=4,
                                  shuffle=True)
    elif args.dataset == 'CIFAR100':
        train_loader = DataLoader(CIFAR100Few(args.data_path,
                                              args.num_per_class,
                                              transform=get_transformer(
                                                  args.dataset,
                                                  cropsize=32,
                                                  crop_padding=4,
                                                  hflip=True)),
                                  batch_size=args.batch_size,
                                  num_workers=4,
                                  shuffle=True)

    test_loader = DataLoader(get_dataset(args, train_flag=False),
                             batch_size=256,
                             num_workers=4,
                             shuffle=False)

    # ---------------------- Training ----------------------
    os.makedirs('./ckpt/student', exist_ok=True)
    params_s_best = OrderedDict()

    for block_id in range(len(blocks_s)):
        best_accuarcy = 0.0
        for epoch in range(args.num_epoch[block_id]):
            if logger: logger.write('Epoch', epoch)
            loss_value = train_epoch(
                args, teacher, blocks_s, blocks_s_len,
                [adaptions_t2s, adaptions_s2t], block_id, train_loader,
                [optimizers_s, optimizers_adapt_t2s, optimizers_adapt_s2t])

            accuracy = test(teacher, test_loader)

            if best_accuarcy < accuracy:
                best_accuarcy = accuracy

            if epoch == args.num_epoch[block_id] - 1:
                block_warp = warp_block(blocks_s, block_id, adaptions_t2s,
                                        adaptions_s2t)
                params_s_best['block-{}'.format(block_id)] \
                    = block_warp.cpu().state_dict().copy()  # deep copy !!!

            if logger:
                logger.write('Accuracy-B{}'.format(block_id), accuracy)

    for block_id in range(len(blocks_s)):
        block = warp_block(blocks_s, block_id, adaptions_t2s, adaptions_s2t)
        block.load_state_dict(params_s_best['block-{}'.format(block_id)])
        block.cuda()
        teacher.set_scion(block, block_graft_ids[block_id], 1)
        accuracy = test(teacher, test_loader)
        if logger:
            logger.write('Test-Best-Accuracy-B{}'.format(block_id), accuracy)

    if logger:
        logger.close()
    with open('ckpt/student/vgg16-student-graft-block-{}-{}perclass.pth'.\
              format(args.dataset, args.num_per_class), 'bw') as f:
        torch.save(params_s_best, f)
Пример #4
0
def run():
    args = parse_args()

    # Vis window
    if args.vis:
        cv2.namedWindow('Display', cv2.WINDOW_NORMAL)

    # Set-up output directories
    dt = datetime.datetime.now().strftime('%y%m%d_%H%M')
    net_desc = '{}_{}'.format(dt, '_'.join(args.description.split()))

    save_folder = os.path.join(args.outdir, net_desc)
    if not os.path.exists(save_folder):
        os.makedirs(save_folder)
    tb = tensorboardX.SummaryWriter(os.path.join(args.logdir, net_desc))

    # Load Dataset
    logging.info('Loading {} Dataset...'.format(args.dataset.title()))
    Dataset = get_dataset(args.dataset)

    train_dataset = Dataset(args.dataset_path,
                            start=0.0,
                            end=args.split,
                            ds_rotate=args.ds_rotate,
                            random_rotate=True,
                            random_zoom=True,
                            include_depth=args.use_depth,
                            include_rgb=args.use_rgb)
    train_data = torch.utils.data.DataLoader(train_dataset,
                                             batch_size=args.batch_size,
                                             shuffle=True,
                                             num_workers=args.num_workers)
    val_dataset = Dataset(args.dataset_path,
                          start=args.split,
                          end=1.0,
                          ds_rotate=args.ds_rotate,
                          random_rotate=True,
                          random_zoom=True,
                          include_depth=args.use_depth,
                          include_rgb=args.use_rgb)
    val_data = torch.utils.data.DataLoader(val_dataset,
                                           batch_size=1,
                                           shuffle=False,
                                           num_workers=args.num_workers)
    logging.info('Done')

    # Load the network
    logging.info('Loading Network...')
    input_channels = 1 * args.use_depth + 3 * args.use_rgb
    ggcnn = get_network(args.network)

    # net = ggcnn(input_channels=input_channels)
    print(torch.cuda.is_available())
    print(torch.cuda.device_count())
    device = torch.device("cuda:0")

    # net = torch.load("./ggcnn_weights_cornell/ggcnn_epoch_23_cornell",map_location=device)
    # net = torch.load("output/models2/cnn3/epoch_50_iou_0.49",map_location=device)
    net = torch.load("output/models2/211209_2216_/epoch_49_iou_0.22",
                     map_location=device)
    # net = net.to(device)
    optimizer = optim.Adam(net.parameters())
    logging.info('Done')

    # Print model architecture.
    summary(net, (input_channels, 300, 300))
    f = open(os.path.join(save_folder, 'arch.txt'), 'w')
    sys.stdout = f
    summary(net, (input_channels, 300, 300))
    sys.stdout = sys.__stdout__
    f.close()
    # torch.load(os.path.join(save_folder,"epoch_10_iou_0.00_statedict.pt"))
    best_iou = 0.0
    for epoch in range(args.epochs + 1):
        logging.info('Beginning Epoch {:02d}'.format(epoch))
        train_results = train(epoch,
                              net,
                              device,
                              train_data,
                              optimizer,
                              args.batches_per_epoch,
                              vis=args.vis)

        # Log training losses to tensorboard
        tb.add_scalar('loss/train_loss', train_results['loss'], epoch)
        for n, l in train_results['losses'].items():
            tb.add_scalar('train_loss/' + n, l, epoch)

        # Run Validation
        logging.info('Validating...')
        test_results = validate(net, device, val_data, args.val_batches)
        logging.info('%d/%d = %f' %
                     (test_results['correct'], test_results['correct'] +
                      test_results['failed'], test_results['correct'] /
                      (test_results['correct'] + test_results['failed'])))

        # Log validation results to tensorbaord
        tb.add_scalar(
            'loss/IOU', test_results['correct'] /
            (test_results['correct'] + test_results['failed']), epoch)
        tb.add_scalar('loss/val_loss', test_results['loss'], epoch)
        for n, l in test_results['losses'].items():
            tb.add_scalar('val_loss/' + n, l, epoch)

        # Save best performing network
        iou = test_results['correct'] / (test_results['correct'] +
                                         test_results['failed'])
        if iou > best_iou or epoch == 0 or (epoch % 10) == 0:
            torch.save(
                net,
                os.path.join(save_folder,
                             'epoch_%02d_iou_%0.2f' % (epoch, iou)))
            torch.save(
                net.state_dict(),
                os.path.join(
                    save_folder,
                    'epoch_%02d_iou_%0.2f_statedict.pt' % (epoch, iou)))
            if iou > best_iou:
                best_iou = iou
def run():
    args = parse_args()

    # Set-up output directories
    dt = datetime.datetime.now().strftime('%y%m%d_%H%M')
    net_desc = '{}_{}'.format(dt, '_'.join(args.description.split()))

    save_folder = os.path.join(args.logdir, net_desc)
    if not os.path.exists(save_folder):
        os.makedirs(save_folder)
    tb = tensorboardX.SummaryWriter(save_folder)

    # Save commandline args
    if args is not None:
        params_path = os.path.join(save_folder, 'commandline_args.json')
        with open(params_path, 'w') as f:
            json.dump(vars(args), f)

    # Initialize logging
    logging.root.handlers = []
    logging.basicConfig(
        level=logging.INFO,
        filename="{0}/{1}.log".format(save_folder, 'log'),
        format=
        '[%(asctime)s] {%(pathname)s:%(lineno)d} %(levelname)s - %(message)s',
        datefmt='%H:%M:%S')
    # set up logging to console
    console = logging.StreamHandler()
    console.setLevel(logging.DEBUG)
    # set a format which is simpler for console use
    formatter = logging.Formatter('%(name)-12s: %(levelname)-8s %(message)s')
    console.setFormatter(formatter)
    # add the handler to the root logger
    logging.getLogger('').addHandler(console)

    # Get the compute device
    device = get_device(args.force_cpu)

    # Load Dataset
    logging.info('Loading {} Dataset...'.format(args.dataset.title()))
    Dataset = get_dataset(args.dataset)
    dataset = Dataset(args.dataset_path,
                      ds_rotate=args.ds_rotate,
                      random_rotate=True,
                      random_zoom=True,
                      include_depth=args.use_depth,
                      include_rgb=args.use_rgb)
    logging.info('Dataset size is {}'.format(dataset.length))

    # Creating data indices for training and validation splits
    indices = list(range(dataset.length))
    split = int(np.floor(args.split * dataset.length))
    if args.ds_shuffle:
        np.random.seed(args.random_seed)
        np.random.shuffle(indices)
    train_indices, val_indices = indices[:split], indices[split:]
    logging.info('Training size: {}'.format(len(train_indices)))
    logging.info('Validation size: {}'.format(len(val_indices)))

    # Creating data samplers and loaders
    train_sampler = torch.utils.data.sampler.SubsetRandomSampler(train_indices)
    val_sampler = torch.utils.data.sampler.SubsetRandomSampler(val_indices)

    train_data = torch.utils.data.DataLoader(dataset,
                                             batch_size=args.batch_size,
                                             num_workers=args.num_workers,
                                             sampler=train_sampler)
    val_data = torch.utils.data.DataLoader(dataset,
                                           batch_size=1,
                                           num_workers=args.num_workers,
                                           sampler=val_sampler)
    logging.info('Done')

    # Load the network
    logging.info('Loading Network...')
    input_channels = 1 * args.use_depth + 3 * args.use_rgb
    network = get_network(args.network)
    net = network(input_channels=input_channels,
                  dropout=args.use_dropout,
                  prob=args.dropout_prob,
                  channel_size=args.channel_size)

    net = net.to(device)
    logging.info('Done')

    if args.optim.lower() == 'adam':
        optimizer = optim.Adam(net.parameters())
    elif args.optim.lower() == 'sgd':
        optimizer = optim.SGD(net.parameters(), lr=0.01, momentum=0.9)
    else:
        raise NotImplementedError('Optimizer {} is not implemented'.format(
            args.optim))

    # Print model architecture.
    summary(net, (input_channels, 224, 224))
    f = open(os.path.join(save_folder, 'arch.txt'), 'w')
    sys.stdout = f
    summary(net, (input_channels, 224, 224))
    sys.stdout = sys.__stdout__
    f.close()

    best_iou = 0.0
    for epoch in range(args.epochs):
        logging.info('Beginning Epoch {:02d}'.format(epoch))
        train_results = train(epoch,
                              net,
                              device,
                              train_data,
                              optimizer,
                              args.batches_per_epoch,
                              vis=args.vis)

        # Log training losses to tensorboard
        tb.add_scalar('loss/train_loss', train_results['loss'], epoch)
        for n, l in train_results['losses'].items():
            tb.add_scalar('train_loss/' + n, l, epoch)

        # Run Validation
        logging.info('Validating...')
        test_results = validate(net, device, val_data)
        logging.info('%d/%d = %f' %
                     (test_results['correct'], test_results['correct'] +
                      test_results['failed'], test_results['correct'] /
                      (test_results['correct'] + test_results['failed'])))

        # Log validation results to tensorbaord
        tb.add_scalar(
            'loss/IOU', test_results['correct'] /
            (test_results['correct'] + test_results['failed']), epoch)
        tb.add_scalar('loss/val_loss', test_results['loss'], epoch)
        for n, l in test_results['losses'].items():
            tb.add_scalar('val_loss/' + n, l, epoch)

        # Save best performing network
        iou = test_results['correct'] / (test_results['correct'] +
                                         test_results['failed'])
        if iou > best_iou or epoch == 0 or (epoch % 10) == 0:
            torch.save(
                net,
                os.path.join(save_folder,
                             'epoch_%02d_iou_%0.2f' % (epoch, iou)))
            best_iou = iou
Пример #6
0
def run(args, save_folder, log_folder):
    tb = tensorboardX.SummaryWriter(log_folder)

    # Load Dataset
    logging.info('Loading {} Dataset...'.format(args.dataset.title()))
    Dataset = get_dataset(args.dataset)

    train_dataset = Dataset(args.dataset_path, start=0.0, end=args.split, ds_rotate=args.ds_rotate,
                            random_rotate=True, random_zoom=True,
                            include_depth=args.use_depth, include_rgb=args.use_rgb)
    train_data = torch.utils.data.DataLoader(
        train_dataset,
        batch_size=args.batch_size,
        shuffle=True,
        num_workers=args.num_workers
    )
    val_dataset = Dataset(args.dataset_path, start=args.split, end=1.0, ds_rotate=args.ds_rotate,
                          random_rotate=True, random_zoom=True,
                          include_depth=args.use_depth, include_rgb=args.use_rgb)
    val_data = torch.utils.data.DataLoader(
        val_dataset,
        batch_size=1,
        shuffle=False,
        num_workers=args.num_workers
    )
    logging.info('Done')

    # Load the network
    logging.info('Loading Network...')
    input_channels = 1*args.use_depth + 3*args.use_rgb
    ggcnn = get_network(args.network)

    net = ggcnn(input_channels=input_channels)
    device = torch.device("cuda:0")
    net = net.to(device)
    optimizer = optim.Adam(net.parameters())
    logging.info('Done')

    # Print model architecture.
    summary(net, (input_channels, 300, 300))
    f = open(os.path.join(save_folder, 'arch.txt'), 'w')
    sys.stdout = f
    summary(net, (input_channels, 300, 300))
    sys.stdout = sys.__stdout__
    f.close()

    best_iou = 0.0
    for epoch in range(args.epochs):
        logging.info('Beginning Epoch {:02d}'.format(epoch))
        train_results = train(epoch, net, device, train_data, optimizer,
                              args.batches_per_epoch, vis=args.vis)

        # Log training losses to tensorboard
        tb.add_scalar('loss/train_loss', train_results['loss'], epoch)
        for n, l in train_results['losses'].items():
            tb.add_scalar('train_loss/' + n, l, epoch)

        # Run Validation
        logging.info('Validating...')
        test_results = validate(net, device, val_data, args.val_batches)
        logging.info('%d/%d = %f' % (test_results['correct'], test_results['correct'] + test_results['failed'],
                                     test_results['correct']/(test_results['correct']+test_results['failed'])))

        # Log validation results to tensorbaord
        tb.add_scalar('loss/IOU', test_results['correct'] /
                      (test_results['correct'] + test_results['failed']), epoch)
        tb.add_scalar('loss/val_loss', test_results['loss'], epoch)
        for n, l in test_results['losses'].items():
            tb.add_scalar('val_loss/' + n, l, epoch)

        # Save best performing network
        iou = test_results['correct'] / (test_results['correct'] + test_results['failed'])
        if iou > best_iou or epoch == 0 or (epoch % 10) == 0:
            torch.save(net, os.path.join(save_folder, 'epoch_%02d_iou_%0.2f' % (epoch, iou)))
            torch.save(net.state_dict(), os.path.join(
                save_folder, 'epoch_%02d_iou_%0.2f_statedict.pt' % (epoch, iou)))
            best_iou = iou
Пример #7
0
time = datetime.now().strftime('%Y%m%d-%H%M%S')
temples = [str(x) for x in settings.TEMPLES]
temples = ''.join(temples)
resolution = f'{settings.IMG_WIDTH}x{settings.IMG_HEIGHT}'
log_name = f'\\{settings.MODEL}\\{settings.DATASET}\\'
log_name += f'{settings.NORM_TYPE}_norm\\t{temples}-{resolution}-buffer{settings.BUFFER_SIZE}-' + \
            f'batch{settings.BATCH_SIZE}-e{settings.EPOCHS}\\{time}'
log_dir = os.path.abspath(settings.LOG_DIR) + log_name

# --- dataset ---
dataset_dir = os.path.abspath(settings.DATASET_DIR)

train, val = data.get_dataset(
    dataset_dir,
    settings.DATASET,
    settings.TEMPLES,
    settings.SPLIT,
    settings.BATCH_SIZE,
    settings.BUFFER_SIZE,
)

# for  testing purposes
# x = y = tf.random.normal((5, settings.IMG_HEIGHT, settings.IMG_WIDTH, 3))
# x = tf.data.Dataset.from_tensor_slices(x).batch(1)
# y = tf.data.Dataset.from_tensor_slices(y).batch(1)
# train = val = tf.data.Dataset.zip((x, y))

# --- model ---
model = builder.get_model(
    settings.MODEL,
    settings.DATASET,
    (settings.IMG_HEIGHT, settings.IMG_WIDTH, 3),
Пример #8
0
        raise ValueError(
            '--jacquard-output can not be used with data augmentation.')

    return args


if __name__ == '__main__':
    args = parse_args()

    # Load Network
    net = torch.load(args.network)
    device = torch.device("cuda:0")

    # Load Dataset
    logging.info('Loading {} Dataset...'.format(args.dataset.title()))
    Dataset = get_dataset(args.dataset)
    test_dataset = Dataset(args.dataset_path,
                           start=args.split,
                           end=1.0,
                           ds_rotate=args.ds_rotate,
                           random_rotate=args.augment,
                           random_zoom=args.augment,
                           include_depth=args.use_depth,
                           include_rgb=args.use_rgb)
    test_data = torch.utils.data.DataLoader(test_dataset,
                                            batch_size=1,
                                            shuffle=False,
                                            num_workers=args.num_workers)
    logging.info('Done')

    results = {'correct': 0, 'failed': 0}
Пример #9
0
def test_whole_net(args):
    cfg_t = cfgs['vgg16']
    cfg_s = cfgs['vgg16-graft']

    cfg_blocks_t = split_block(cfg_t)
    cfg_blocks_s = split_block(cfg_s)

    num_block = len(block_graft_ids)

    # ---------------------- Network ----------------------
    teacher = vgg_stock(cfg_t, args.dataset, args.num_class)

    params_t = torch.load(args.ckpt)

    teacher.cuda().eval()
    teacher.load_state_dict(params_t)

    adaptions_t2s = [
        nn.Conv2d(cfg_blocks_t[block_graft_ids[i]][-2],
                  cfg_blocks_s[block_graft_ids[i]][-2],
                  kernel_size=1).cuda() for i in range(0, num_block - 1)
    ]

    adaptions_s2t = [
        nn.Conv2d(cfg_blocks_s[block_graft_ids[i]][-2],
                  cfg_blocks_t[block_graft_ids[i]][-2],
                  kernel_size=1).cuda() for i in range(0, num_block - 1)
    ]

    cfg_s = cfgs['vgg16-graft']
    student = vgg_bw(cfg_s, True, args.dataset, args.num_class)
    student.cuda()

    params_s = {}
    for key in params_t.keys():
        key_split = key.split('.')
        if key_split[0] == 'features' and \
                key_split[1] in ['0', '1', '2']:
            params_s[key] = params_t[key]

    student.load_state_dict(params_s, strict=False)

    blocks_s = [student.features[i] for i in block_graft_ids[:-1]]
    blocks_s += [nn.Sequential(nn.Flatten().cuda(), student.classifier)]

    blocks = []

    for block_id in range(num_block):
        blocks.append(
            warp_block(blocks_s, block_id, adaptions_t2s,
                       adaptions_s2t).cuda())

    block = nn.Sequential(*blocks)
    block.load_state_dict(
        torch.load('ckpt/student/vgg16-student-graft-net-{}-{}perclass.pth'\
                          .format(args.dataset, args.num_per_class))
    )

    test_loader = DataLoader(get_dataset(args, train_flag=False),
                             batch_size=args.batch_size,
                             num_workers=4,
                             shuffle=False)

    block = nn.Sequential(student.features[:3], block)

    print('Test Accuracy: ', test(block, test_loader))
Пример #10
0
def main_worker(args, ml_logger):
    global best_acc1
    datatime_str = datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S')
    suf_name = "_" + args.experiment

    if args.gpu_ids is not None:
        print("Use GPU: {} for training".format(args.gpu_ids))

    if args.log_stats:
        from utils.stats_trucker import StatsTrucker as ST
        ST("W{}A{}".format(args.bit_weights, args.bit_act))

    if 'resnet' in args.arch and args.custom_resnet:
        # pdb.set_trace()
        model = custom_resnet(arch=args.arch,
                              pretrained=args.pretrained,
                              depth=arch2depth(args.arch),
                              dataset=args.dataset)
    elif 'inception_v3' in args.arch and args.custom_inception:
        model = custom_inception(pretrained=args.pretrained)
    else:
        print("=> using pre-trained model '{}'".format(args.arch))
        model = models.__dict__[args.arch](pretrained=args.pretrained)

    device = torch.device('cuda:{}'.format(args.gpu_ids[0]))
    cudnn.benchmark = True

    torch.cuda.set_device(args.gpu_ids[0])
    model = model.to(device)

    # optionally resume from a checkpoint
    if args.resume:
        if os.path.isfile(args.resume):
            print("=> loading checkpoint '{}'".format(args.resume))
            checkpoint = torch.load(args.resume, device)
            args.start_epoch = checkpoint['epoch']
            # best_acc1 = checkpoint['best_acc1']
            # best_acc1 may be from a checkpoint from a different GPU
            # best_acc1 = best_acc1.to(device)
            checkpoint['state_dict'] = {
                normalize_module_name(k): v
                for k, v in checkpoint['state_dict'].items()
            }
            model.load_state_dict(checkpoint['state_dict'], strict=False)
            # optimizer.load_state_dict(checkpoint['optimizer'])
            print("=> loaded checkpoint '{}' (epoch {})".format(
                args.resume, checkpoint['epoch']))
        else:
            print("=> no checkpoint found at '{}'".format(args.resume))

    if len(args.gpu_ids) > 1:
        # DataParallel will divide and allocate batch_size to all available GPUs
        if args.arch.startswith('alexnet') or args.arch.startswith('vgg'):
            model.features = torch.nn.DataParallel(model.features,
                                                   args.gpu_ids)
        else:
            model = torch.nn.DataParallel(model, args.gpu_ids)

    default_transform = {
        'train': get_transform(args.dataset, augment=True),
        'eval': get_transform(args.dataset, augment=False)
    }

    val_data = get_dataset(args.dataset, 'val', default_transform['eval'])
    val_loader = torch.utils.data.DataLoader(val_data,
                                             batch_size=args.batch_size,
                                             shuffle=True,
                                             num_workers=args.workers,
                                             pin_memory=True)

    # define loss function (criterion) and optimizer
    criterion = nn.CrossEntropyLoss().to(device)

    train_data = get_dataset(args.dataset, 'train', default_transform['train'])
    train_loader = torch.utils.data.DataLoader(train_data,
                                               batch_size=args.batch_size,
                                               shuffle=True,
                                               num_workers=args.workers,
                                               pin_memory=True,
                                               drop_last=True)

    # TODO: replace this call by initialization on small subset of training data
    # TODO: enable for activations
    # validate(val_loader, model, criterion, args, device)

    optimizer = torch.optim.SGD(model.parameters(),
                                args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)
    # optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)
    lr_scheduler = StepLR(optimizer, step_size=args.lr_step, gamma=0.1)

    # pdb.set_trace()
    mq = None
    if args.quantize:
        if args.bn_folding:
            print(
                "Applying batch-norm folding ahead of post-training quantization"
            )
            from utils.absorb_bn import search_absorbe_bn
            search_absorbe_bn(model)

        all_convs = [
            n for n, m in model.named_modules() if isinstance(m, nn.Conv2d)
        ]
        # all_convs = [l for l in all_convs if 'downsample' not in l]
        all_relu = [
            n for n, m in model.named_modules() if isinstance(m, nn.ReLU)
        ]
        all_relu6 = [
            n for n, m in model.named_modules() if isinstance(m, nn.ReLU6)
        ]
        layers = all_relu[1:-1] + all_relu6[1:-1] + all_convs[1:]
        replacement_factory = {
            nn.ReLU: ActivationModuleWrapper,
            nn.ReLU6: ActivationModuleWrapper,
            nn.Conv2d: ParameterModuleWrapper
        }
        mq = ModelQuantizer(
            model, args, layers, replacement_factory,
            OptimizerBridge(optimizer,
                            settings={
                                'algo': 'SGD',
                                'dataset': args.dataset
                            }))

        if args.resume:
            # Load quantization parameters from state dict
            mq.load_state_dict(checkpoint['state_dict'])

        mq.log_quantizer_state(ml_logger, -1)

        if args.model_freeze:
            mq.freeze()

    # pdb.set_trace()
    if args.evaluate:
        acc = validate(val_loader, model, criterion, args, device)
        ml_logger.log_metric('Val Acc1', acc)
        return

    # evaluate on validation set
    acc1 = validate(val_loader, model, criterion, args, device)
    ml_logger.log_metric('Val Acc1', acc1, -1)

    # evaluate with k-means quantization
    # if args.model_freeze:
    # with mq.disable():
    #     acc1_nq = validate(val_loader, model, criterion, args, device)
    #     ml_logger.log_metric('Val Acc1 fp32', acc1_nq, -1)

    # pdb.set_trace()
    # Kurtosis regularization on weights tensors
    weight_to_hook = {}
    if args.w_kurtosis:
        if args.weight_name[0] == 'all':
            all_convs = [
                n.replace(".wrapped_module", "") + '.weight'
                for n, m in model.named_modules() if isinstance(m, nn.Conv2d)
            ]
            weight_name = all_convs[1:]
            if args.remove_weight_name:
                for rm_name in args.remove_weight_name:
                    weight_name.remove(rm_name)
        else:
            weight_name = args.weight_name
        for name in weight_name:
            # pdb.set_trace()
            curr_param = fine_weight_tensor_by_name(model, name)
            # if not curr_param:
            #     name = 'float_' + name # QAT name
            #     curr_param = fine_weight_tensor_by_name(self.model, name)
            # if curr_param is not None:
            weight_to_hook[name] = curr_param

    for epoch in range(0, args.epochs):
        # train for one epoch
        print('Timestamp Start epoch: {:%Y-%m-%d %H:%M:%S}'.format(
            datetime.datetime.now()))
        train(train_loader, model, criterion, optimizer, epoch, args, device,
              ml_logger, val_loader, mq, weight_to_hook)
        print('Timestamp End epoch: {:%Y-%m-%d %H:%M:%S}'.format(
            datetime.datetime.now()))

        if not args.lr_freeze:
            lr_scheduler.step()

        # evaluate on validation set
        acc1 = validate(val_loader, model, criterion, args, device)
        ml_logger.log_metric('Val Acc1', acc1, step='auto')

        # evaluate with k-means quantization
        # if args.model_freeze:
        # with mq.quantization_method('kmeans'):
        #     acc1_kmeans = validate(val_loader, model, criterion, args, device)
        #     ml_logger.log_metric('Val Acc1 kmeans', acc1_kmeans, epoch)

        # with mq.disable():
        #     acc1_nq = validate(val_loader, model, criterion, args, device)
        #     ml_logger.log_metric('Val Acc1 fp32', acc1_nq,  step='auto')

        if args.quantize:
            mq.log_quantizer_state(ml_logger, epoch)

        # remember best acc@1 and save checkpoint
        is_best = acc1 > best_acc1
        best_acc1 = max(acc1, best_acc1)

        save_checkpoint(
            {
                'epoch':
                epoch + 1,
                'arch':
                args.arch,
                'state_dict':
                model.state_dict()
                if len(args.gpu_ids) == 1 else model.module.state_dict(),
                'best_acc1':
                best_acc1,
                'optimizer':
                optimizer.state_dict(),
            },
            is_best,
            datatime_str=datatime_str,
            suf_name=suf_name)
Пример #11
0
    else:
        print('==> Load pretrained model form', args.pretrained, '...')
        pretrained_model = torch.load(args.pretrained)
        # best_acc = pretrained_model['best_acc']
        model.load_state_dict(pretrained_model['state_dict'])

    # Setup dataset - transformation, dataloader
    default_transform = {
        'train':
        get_transform(args.dataset, input_size=args.input_size, augment=True),
        'eval':
        get_transform(args.dataset, input_size=args.input_size, augment=False)
    }
    transform = getattr(model, 'input_transform', default_transform)

    test_data = get_dataset(args.dataset, 'val', transform['eval'])
    testloader = torch.utils.data.DataLoader(test_data,
                                             batch_size=args.batch_size,
                                             shuffle=False,
                                             num_workers=args.workers,
                                             pin_memory=True)

    criterion = nn.CrossEntropyLoss()

    model.to(device)  #.half() # uncomment for FP16
    #model = torch.nn.DataParallel(model)

    [test_acc, test_loss] = test()
    print("Testing accuracy: ", test_acc)

    #sparsity_validate(model)
    if args.mvm:
        model = model_mvm

    model.to(device)  #.half() # uncomment for FP16
    model = torch.nn.DataParallel(model)

    # Setup dataset - transformation, dataloader
    default_transform = {
        'train':
        get_transform(args.dataset, input_size=args.input_size, augment=True),
        'eval':
        get_transform(args.dataset, input_size=args.input_size, augment=False)
    }
    transform = getattr(model, 'input_transform', default_transform)

    train_data = get_dataset(args.dataset, 'train', transform['train'])
    trainloader = torch.utils.data.DataLoader(train_data,
                                              batch_size=args.batch_size,
                                              shuffle=True,
                                              num_workers=args.workers,
                                              pin_memory=True,
                                              worker_init_fn=_init_fn)

    test_data = get_dataset(args.dataset, 'val', transform['eval'])
    testloader = torch.utils.data.DataLoader(test_data,
                                             batch_size=args.batch_size,
                                             shuffle=False,
                                             num_workers=args.workers,
                                             pin_memory=True,
                                             worker_init_fn=_init_fn)
Пример #13
0
    def __init__(self, arch, use_custom_resnet, use_custom_inception,
                 pretrained, dataset, gpu_ids, datapath, batch_size, shuffle,
                 workers, print_freq, cal_batch_size, cal_set_size, args):
        self.arch = arch
        self.use_custom_resnet = use_custom_resnet
        self.pretrained = pretrained
        self.dataset = dataset
        self.gpu_ids = gpu_ids
        self.datapath = datapath
        self.batch_size = batch_size
        self.shuffle = shuffle
        self.workers = workers
        self.print_freq = print_freq
        self.cal_batch_size = cal_batch_size
        self.cal_set_size = cal_set_size  # TODO: pass it as cmd line argument

        # create model
        if 'resnet' in arch and use_custom_resnet:
            model = custom_resnet(arch=arch,
                                  pretrained=pretrained,
                                  depth=arch2depth(arch),
                                  dataset=dataset)
        elif 'inception_v3' in arch and use_custom_inception:
            model = custom_inception(pretrained=pretrained)
        else:
            print("=> using pre-trained model '{}'".format(arch))
            model = models.__dict__[arch](pretrained=pretrained)

        self.device = torch.device('cuda:{}'.format(gpu_ids[0]))

        torch.cuda.set_device(gpu_ids[0])
        model = model.to(self.device)

        # optionally resume from a checkpoint
        if args.resume:
            if os.path.isfile(args.resume):
                print("=> loading checkpoint '{}'".format(args.resume))
                checkpoint = torch.load(args.resume, self.device)
                args.start_epoch = checkpoint['epoch']
                checkpoint['state_dict'] = {
                    normalize_module_name(k): v
                    for k, v in checkpoint['state_dict'].items()
                }
                model.load_state_dict(checkpoint['state_dict'], strict=False)
                print("=> loaded checkpoint '{}' (epoch {})".format(
                    args.resume, checkpoint['epoch']))
            else:
                print("=> no checkpoint found at '{}'".format(args.resume))

        if len(gpu_ids) > 1:
            # DataParallel will divide and allocate batch_size to all available GPUs
            if arch.startswith('alexnet') or arch.startswith('vgg'):
                model.features = torch.nn.DataParallel(model.features, gpu_ids)
            else:
                model = torch.nn.DataParallel(model, gpu_ids)

        self.model = model

        if args.bn_folding:
            print(
                "Applying batch-norm folding ahead of post-training quantization"
            )
            from utils.absorb_bn import search_absorbe_bn
            search_absorbe_bn(model)

        # define loss function (criterion) and optimizer
        self.criterion = torch.nn.CrossEntropyLoss().to(self.device)

        val_data = get_dataset(
            dataset,
            'val',
            get_transform(dataset,
                          augment=False,
                          scale_size=299 if 'inception' in arch else None,
                          input_size=299 if 'inception' in arch else None),
            datasets_path=datapath)
        self.val_loader = torch.utils.data.DataLoader(val_data,
                                                      batch_size=batch_size,
                                                      shuffle=shuffle,
                                                      num_workers=workers,
                                                      pin_memory=True)

        self.cal_loader = torch.utils.data.DataLoader(
            val_data,
            batch_size=self.cal_batch_size,
            shuffle=shuffle,
            num_workers=workers,
            pin_memory=True)
Пример #14
0
def main():
    set_seed(1)
    date = time.strftime("%Y%m%d%H%M%S", time.localtime())
    print(f'* Preparing to train model {date}')

    # ************** configuration ****************
    # - training setting
    resume = config['resume']
    if config['mode'] == 'PYNATIVE':
        mode = context.PYNATIVE_MODE
    else:
        mode = context.GRAPH_MODE

    device = config['device']
    device_id = config['device_id']
    dataset_sink_mode = config['dataset_sink_mode']

    # use in dataset
    div = 8

    # setting bias and padding
    if resume:
        print('* Resuming model...')
        resume_config_log = config['resume_config_log']
        resume_config = get_eval_config(resume_config_log)
        if 'best_ckpt' in resume_config.keys():
            resume_model_path = resume_config['best_ckpt']
        else:
            resume_model_path = resume_config['latest_model']
            print('* [WARNING] Not using the best model, but latest saved model instead.')

        has_bias = resume_config['has_bias']
        use_dropout = resume_config['use_dropout']

        pad_mode = resume_config['pad_mode']

        if pad_mode == 'pad':
            padding = resume_config['padding']
        elif pad_mode == 'same':
            padding = 0
        else:
            raise ValueError(f"invalid pad mode: {pad_mode}!")

        best_acc = resume_config['best_acc']
        best_ckpt = resume_config['best_ckpt']
        print('* The best accuracy in dev dataset for the current resumed model is {:.2f}%'.format(best_acc * 100))

    else:
        has_bias = config['has_bias']
        use_dropout = config['use_dropout']

        pad_mode = config['pad_mode']

        if pad_mode == 'pad':
            padding = config['padding']
        elif pad_mode == 'same':
            padding = 0
        else:
            raise ValueError(f"invalid pad mode: {pad_mode}!")

    # hyper-parameters
    if resume:
        batch_size = resume_config['batch_size']
        opt_type = resume_config['opt']
        use_dynamic_lr = resume_config['use_dynamic_lr']
        warmup_step = resume_config['warmup_step']
        warmup_ratio = resume_config['warmup_ratio']
    else:
        batch_size = config['batch_size']
        opt_type = config['opt']
        use_dynamic_lr = config['use_dynamic_lr']
        warmup_step = config['warmup_step']
        warmup_ratio = config['warmup_ratio']

    test_dev_batch_size = config['test_dev_batch_size']
    learning_rate = float(config['learning_rate'])
    epochs = config['epochs']
    loss_scale = config['loss_scale']

    # configuration of saving model checkpoint
    save_checkpoint_steps = config['save_checkpoint_steps']
    keep_checkpoint_max = config['keep_checkpoint_max']
    prefix = config['prefix'] + '_' + date
    model_dir = config['model_dir']

    # loss monitor
    loss_monitor_step = config['loss_monitor_step']

    # whether to use mindInsight summary
    use_summary = config['use_summary']

    # step_eval
    use_step_eval = config['use_step_eval']
    eval_step = config['eval_step']
    eval_epoch = config['eval_epoch']
    patience = config['patience']

    # eval in steps or epochs
    step_eval = True

    if eval_step == -1:
        step_eval = False

    # ************** end of configuration **************
    if device == 'GPU':
        context.set_context(mode=mode, device_target=device, device_id=device_id)
    elif device == 'Ascend':
        import moxing as mox
        from utils.const import DATA_PATH, MODEL_PATH, BEST_MODEL_PATH, LOG_PATH
        obs_datapath = config['obs_datapath']
        obs_saved_model = config['obs_saved_model']
        obs_best_model = config['obs_best_model']
        obs_log = config['obs_log']
        mox.file.copy_parallel(obs_datapath, DATA_PATH)
        mox.file.copy_parallel(MODEL_PATH, obs_saved_model)
        mox.file.copy_parallel(BEST_MODEL_PATH, obs_best_model)
        mox.file.copy_parallel(LOG_PATH, obs_log)
        context.set_context(mode=mode, device_target=device)
        use_summary = False

    # callbacks function
    callbacks = []

    # data
    train_loader, idx2label, label2idx = get_dataset(batch_size=batch_size, phase='train',
                                                     test_dev_batch_size=test_dev_batch_size, div=div,
                                                     num_parallel_workers=4)

    if eval_step == 0:
        eval_step = train_loader.get_dataset_size()

    # network
    net = DFCNN(num_classes=len(label2idx), padding=padding, pad_mode=pad_mode,
                has_bias=has_bias, use_dropout=use_dropout)

    # Criterion
    criterion = CTCLoss()

    # resume
    if resume:
        print("* Loading parameters...")
        param_dict = load_checkpoint(resume_model_path)
        # load the parameter into net
        load_param_into_net(net, param_dict)
        print(f'* Parameters loading from {resume_model_path} succeeded!')

    net.set_train(True)
    net.set_grad(True)

    # lr schedule
    if use_dynamic_lr:
        dataset_size = train_loader.get_dataset_size()
        learning_rate = Tensor(dynamic_lr(base_lr=learning_rate, warmup_step=warmup_step,
                                          warmup_ratio=warmup_ratio, epochs=epochs,
                                          steps_per_epoch=dataset_size), mstype.float32)
        print('* Using dynamic learning rate, which will be set up as :', learning_rate.asnumpy())

    # optim
    if opt_type == 'adam':
        opt = nn.Adam(net.trainable_params(), learning_rate=learning_rate, beta1=0.9, beta2=0.999, weight_decay=0.0,
                      eps=10e-8)
    elif opt_type == 'rms':
        opt = nn.RMSProp(params=net.trainable_params(),
                         centered=True,
                         learning_rate=learning_rate,
                         momentum=0.9,
                         loss_scale=loss_scale)
    elif opt_type == 'sgd':
        opt = nn.SGD(params=net.trainable_params(), learning_rate=learning_rate)
    else:
        raise ValueError(f"optimizer: {opt_type} is not supported for now!")

    if resume:
        # load the parameter into optimizer
        load_param_into_net(opt, param_dict)

    # save_model
    config_ck = CheckpointConfig(save_checkpoint_steps=save_checkpoint_steps, keep_checkpoint_max=keep_checkpoint_max)
    ckpt_cb = ModelCheckpoint(prefix=prefix, directory=model_dir, config=config_ck)

    # logger
    the_logger = logger(config, date)
    log = Logging(logger=the_logger, model_ckpt=ckpt_cb)

    callbacks.append(ckpt_cb)
    callbacks.append(log)

    net = WithLossCell(net, criterion)
    scaling_sens = Tensor(np.full((1), loss_scale), dtype=mstype.float32)

    net = DFCNNCTCTrainOneStepWithLossScaleCell(net, opt, scaling_sens)
    net.set_train(True)
    model = Model(net)

    if use_step_eval:
        # step evaluation
        step_eval = StepAccInfo(model=model, name=prefix, div=div, test_dev_batch_size=test_dev_batch_size,
                                step_eval=step_eval, eval_step=eval_step, eval_epoch=eval_epoch,
                                logger=the_logger, patience=patience, dataset_size=train_loader.get_dataset_size())

        callbacks.append(step_eval)

    # loss monitor
    loss_monitor = LossMonitor(loss_monitor_step)

    callbacks.append(loss_monitor)

    if use_summary:
        summary_dir = os.path.join(SUMMARY_DIR, date)
        if not os.path.exists(summary_dir):
            os.mkdir(summary_dir)
        # mindInsight
        summary_collector = SummaryCollector(summary_dir=summary_dir, collect_freq=1, max_file_size=4 * 1024 ** 3)
        callbacks.append(summary_collector)

    if resume:
        the_logger.update_acc_ckpt(best_acc, best_ckpt)

    print(f'* Start training...')
    model.train(epochs,
                train_loader,
                callbacks=callbacks,
                dataset_sink_mode=dataset_sink_mode)
Пример #15
0
def graft_net(args):
    global logger_net
    logger_net = Logger('log/graft_net_{}_{}_{}perclass.txt'.\
                    format(args.dataset, time.strftime("%Y-%m-%d_%H-%M-%S", time.localtime()),
                           args.num_per_class))
    # ---------------------- Datasets ----------------------
    if args.dataset == 'CIFAR10':
        train_loader = DataLoader(CIFAR10Few(args.data_path,
                                             args.num_per_class,
                                             transform=get_transformer(
                                                 args.dataset,
                                                 cropsize=32,
                                                 crop_padding=4,
                                                 hflip=True)),
                                  batch_size=args.batch_size,
                                  num_workers=4,
                                  shuffle=True)
    elif args.dataset == 'CIFAR100':
        train_loader = DataLoader(CIFAR100Few(args.data_path,
                                              args.num_per_class,
                                              transform=get_transformer(
                                                  args.dataset,
                                                  cropsize=32,
                                                  crop_padding=4,
                                                  hflip=True)),
                                  batch_size=args.batch_size,
                                  num_workers=4,
                                  shuffle=True)

    test_loader = DataLoader(get_dataset(args, train_flag=False),
                             batch_size=args.batch_size,
                             num_workers=4,
                             shuffle=False)

    cfg_t = cfgs['vgg16']
    cfg_s = cfgs['vgg16-graft']

    cfg_blocks_t = split_block(cfg_t)
    cfg_blocks_s = split_block(cfg_s)

    num_block = len(block_graft_ids)
    # ---------------------- Adaption ----------------------
    adaptions_t2s = [
        nn.Conv2d(cfg_blocks_t[block_graft_ids[i]][-2],
                  cfg_blocks_s[block_graft_ids[i]][-2],
                  kernel_size=1).cuda() for i in range(0, num_block - 1)
    ]

    adaptions_s2t = [
        nn.Conv2d(cfg_blocks_s[block_graft_ids[i]][-2],
                  cfg_blocks_t[block_graft_ids[i]][-2],
                  kernel_size=1).cuda() for i in range(0, num_block - 1)
    ]

    # ---------------------- Teacher ----------------------
    teacher = vgg_stock(cfg_t, args.dataset, args.num_class)

    params_t = torch.load(args.ckpt)

    teacher.cuda().eval()
    teacher.load_state_dict(params_t)

    # ---------------------- Blocks ----------------------
    params_s = {}
    for key in params_t.keys():
        key_split = key.split('.')
        if key_split[0] == 'features' and \
                key_split[1] in ['0', '1', '2']:
            params_s[key] = params_t[key]

    student = vgg_bw(cfg_s, True, args.dataset, args.num_class)
    student.cuda().train()
    student.load_state_dict(params_s, strict=False)

    blocks_s = [student.features[i] for i in block_graft_ids[:-1]]
    blocks_s += [nn.Sequential(nn.Flatten().cuda(), student.classifier)]

    blocks = []

    for block_id in range(num_block):
        blocks.append(
            warp_block(blocks_s, block_id, adaptions_t2s,
                       adaptions_s2t).cuda())

    params = torch.load('ckpt/student/vgg16-student-graft-block-{}-{}perclass.pth'.\
                        format(args.dataset, args.num_per_class))

    for block_id in range(num_block):
        blocks[block_id].load_state_dict(params['block-{}'.format(block_id)])

    for i in range(num_block - 1):
        block = nn.Sequential(*blocks[:(i + 2)])
        optimizer = optim.Adam(block.parameters(), lr=0.0001)

        scion_len = sum(blocks_s_len[:(i + 2)])

        accuracy_best_block = 0.0
        params_best_save = None

        for epoch in range(args.num_epoch[i]):
            if logger_net: logger_net.write('Epoch', epoch)
            loss_value = train_epoch(args, teacher, block, scion_len,
                                     train_loader, optimizer)

            accuracy = test(teacher, test_loader)

            if accuracy > accuracy_best_block:
                accuracy_best_block = accuracy
                params_tmp = block.cpu().state_dict()
                params_best_save = params_tmp.copy()
                block.cuda()

            if epoch == (args.num_epoch[i] - 1) and \
                i == (num_block - 2):
                block.load_state_dict(params_best_save)

            if logger_net:
                logger_net.write('Accuracy-length-{}'.format(scion_len),
                                 accuracy)

    if logger_net:
        logger_net.write('Student Best Accuracy', accuracy_best_block)

    with open('ckpt/student/vgg16-student-graft-net-{}-{}perclass.pth'\
                          .format(args.dataset, args.num_per_class), 'wb') as f:
        torch.save(block.state_dict(), f)
    if logger_net:
        logger_net.close()
    return accuracy_best_block
Пример #16
0
def run():
    args = parse_args()

    # Set-up output directories
    dt = datetime.datetime.now().strftime('%y%m%d_%H%M')
    net_desc = '{}_{}'.format(dt, '_'.join(args.description.split()))

    save_folder = os.path.join(args.outdir, net_desc)
    if not os.path.exists(save_folder):
        os.makedirs(save_folder)
    tb = tensorboardX.SummaryWriter(os.path.join(args.logdir, net_desc))

    # Load Dataset
    logging.info('Loading {} Dataset...'.format(args.dataset.title()))
    Dataset = get_dataset(args.dataset)

    # train_dataset = Dataset(args.dataset_path, start=0.0, end=args.split, ds_rotate=args.ds_rotate,
    #                         random_rotate=True, random_zoom=True,
    #                         include_depth=args.use_depth, include_rgb=args.use_rgb)
    train_dataset = Dataset(args.dataset_path,
                            start=0.0,
                            end=args.split,
                            ds_rotate=args.ds_rotate,
                            random_rotate=True,
                            random_zoom=True,
                            include_depth=args.use_depth,
                            include_rgb=args.use_rgb)
    train_data = torch.utils.data.DataLoader(train_dataset,
                                             batch_size=args.batch_size,
                                             shuffle=True,
                                             num_workers=args.num_workers)
    val_dataset = Dataset(args.dataset_path,
                          start=args.split,
                          end=1.0,
                          ds_rotate=args.ds_rotate,
                          random_rotate=False,
                          random_zoom=False,
                          include_depth=args.use_depth,
                          include_rgb=args.use_rgb)
    val_data = torch.utils.data.DataLoader(val_dataset,
                                           batch_size=1,
                                           shuffle=False,
                                           num_workers=args.num_workers)
    logging.info('Done')

    # Load the network
    logging.info('Loading Network...')
    input_channels = 1 * args.use_depth + 3 * args.use_rgb
    ggcnn = get_network(args.network)

    net = ggcnn(input_channels=input_channels)
    device = torch.device("cpu")
    # device = torch.device("cuda:0")
    net = net.to(device)
    optimizer = optim.Adam(net.parameters())
    logging.info('Done')

    # Print model architecture.
    summary(net, (input_channels, 200, 200))
    f = open(os.path.join(save_folder, 'arch.txt'), 'w')
    sys.stdout = f
    summary(net, (input_channels, 200, 200))
    sys.stdout = sys.__stdout__
    f.close()

    best_iou = 1000.0
    for epoch in range(args.epochs):
        logging.info('Beginning Epoch {:02d}'.format(epoch))
        train_results = train(epoch,
                              net,
                              device,
                              train_data,
                              optimizer,
                              args.batches_per_epoch,
                              vis=args.vis)

        # Log training losses to tensorboard
        tb.add_scalar('loss/train_loss', train_results['loss'], epoch)
        for n, l in train_results['losses'].items():
            tb.add_scalar('train_loss/' + n, l, epoch)

        # Run Validation
        logging.info('Validating...')
        test_results = validate(net, device, val_data, args.val_batches)
        #logging.info('%d/%d = %f' % (test_results['correct'], test_results['correct'] + test_results['failed'],
        #                             test_results['correct']/(test_results['correct']+test_results['failed'])))

        # Log validation results to tensorbaord
        #tb.add_scalar('loss/IOU', test_results['correct'] / (test_results['correct'] + test_results['failed']), epoch)
        tb.add_scalar('loss/val_loss', test_results['loss'], epoch)
        for n, l in test_results['losses'].items():
            tb.add_scalar('val_loss/' + n, l, epoch)

        # Save best performing network
        #iou = test_results['correct'] / (test_results['correct'] + test_results['failed'])
        iou = test_results['loss']
        # only save if this epoch is better than prev one
        # always save the first one and then every 10
        if iou < best_iou or epoch == 0 or (epoch % 10) == 0:
            torch.save(
                net,
                os.path.join(save_folder,
                             'epoch_%02d_iou_%0.2f' % (epoch, iou)))
            best_iou = iou
def main_worker(args, ml_logger):
    global best_acc1

    if args.gpu_ids is not None:
        print("Use GPU: {} for training".format(args.gpu_ids))

    # create model
    if 'resnet' in args.arch and args.custom_resnet:
        model = custom_resnet(arch=args.arch,
                              pretrained=args.pretrained,
                              depth=arch2depth(args.arch),
                              dataset=args.dataset)
    elif 'inception_v3' in args.arch and args.custom_inception:
        model = custom_inception(pretrained=args.pretrained)

    elif args.pretrained:
        print("=> using pre-trained model '{}'".format(args.arch))
        model = models.__dict__[args.arch](pretrained=True)
    else:
        print("=> creating model '{}'".format(args.arch))
        model = models.__dict__[args.arch]()

    device = torch.device('cuda:{}'.format(args.gpu_ids[0]))
    cudnn.benchmark = True

    torch.cuda.set_device(args.gpu_ids[0])
    model = model.to(device)

    # optionally resume from a checkpoint
    if args.resume:
        if os.path.isfile(args.resume):
            # mq = ModelQuantizer(model, args)
            print("=> loading checkpoint '{}'".format(args.resume))
            checkpoint = torch.load(args.resume, device)
            args.start_epoch = checkpoint['epoch']
            best_acc1 = checkpoint['best_acc1']
            # best_acc1 may be from a checkpoint from a different GPU
            # best_acc1 = best_acc1.to(device)
            model.load_state_dict(checkpoint['state_dict'])
            # optimizer.load_state_dict(checkpoint['optimizer'])
            print("=> loaded checkpoint '{}' (epoch {})".format(
                args.resume, checkpoint['epoch']))
        else:
            print("=> no checkpoint found at '{}'".format(args.resume))

    if len(args.gpu_ids) > 1:
        # DataParallel will divide and allocate batch_size to all available GPUs
        if args.arch.startswith('alexnet') or args.arch.startswith('vgg'):
            model.features = torch.nn.DataParallel(model.features,
                                                   args.gpu_ids)
        else:
            model = torch.nn.DataParallel(model, args.gpu_ids)

    val_data = get_dataset(
        args.dataset,
        'val',
        get_transform(args.dataset,
                      augment=False,
                      scale_size=299 if 'inception' in args.arch else None,
                      input_size=299 if 'inception' in args.arch else None),
        datasets_path=args.datapath)
    val_loader = torch.utils.data.DataLoader(val_data,
                                             batch_size=args.batch_size,
                                             shuffle=args.shuffle,
                                             num_workers=args.workers,
                                             pin_memory=True)

    # define loss function (criterion) and optimizer
    criterion = nn.CrossEntropyLoss().to(device)
    if 'inception' in args.arch and args.custom_inception:
        first = 3
        last = -1
    else:
        first = 1
        last = -1
    if args.quantize:
        all_convs = [
            n for n, m in model.named_modules() if isinstance(m, nn.Conv2d)
        ]
        all_relu = [
            n for n, m in model.named_modules() if isinstance(m, nn.ReLU)
        ]
        all_relu6 = [
            n for n, m in model.named_modules() if isinstance(m, nn.ReLU6)
        ]
        layers = all_relu[first:last] + all_relu6[first:last] + all_convs[
            first:last]
        replacement_factory = {
            nn.ReLU: ActivationModuleWrapperPost,
            nn.ReLU6: ActivationModuleWrapperPost,
            nn.Conv2d: ParameterModuleWrapperPost
        }
        mq = ModelQuantizer(model, args, layers, replacement_factory)
        mq.log_quantizer_state(ml_logger, -1)

    acc = validate(val_loader, model, criterion, args, device)
    ml_logger.log_metric('Val Acc1', acc, step='auto')
def main_worker(args, ml_logger):
    global best_acc1

    if args.gpu_ids is not None:
        print("Use GPU: {} for training".format(args.gpu_ids))

    if args.log_stats:
        from utils.stats_trucker import StatsTrucker as ST
        ST("W{}A{}".format(args.bit_weights, args.bit_act))

    if 'resnet' in args.arch and args.custom_resnet:
        model = custom_resnet(arch=args.arch,
                              pretrained=args.pretrained,
                              depth=arch2depth(args.arch),
                              dataset=args.dataset)
    elif 'inception_v3' in args.arch and args.custom_inception:
        model = custom_inception(pretrained=args.pretrained)
    else:
        print("=> using pre-trained model '{}'".format(args.arch))
        model = models.__dict__[args.arch](pretrained=args.pretrained)

    device = torch.device('cuda:{}'.format(args.gpu_ids[0]))
    cudnn.benchmark = True

    torch.cuda.set_device(args.gpu_ids[0])
    model = model.to(device)

    # optionally resume from a checkpoint
    if args.resume:
        if os.path.isfile(args.resume):
            print("=> loading checkpoint '{}'".format(args.resume))
            checkpoint = torch.load(args.resume, device)
            args.start_epoch = checkpoint['epoch']
            # best_acc1 = checkpoint['best_acc1']
            # best_acc1 may be from a checkpoint from a different GPU
            # best_acc1 = best_acc1.to(device)
            checkpoint['state_dict'] = {
                normalize_module_name(k): v
                for k, v in checkpoint['state_dict'].items()
            }
            model.load_state_dict(checkpoint['state_dict'], strict=False)
            # optimizer.load_state_dict(checkpoint['optimizer'])
            print("=> loaded checkpoint '{}' (epoch {})".format(
                args.resume, checkpoint['epoch']))
        else:
            print("=> no checkpoint found at '{}'".format(args.resume))

    if len(args.gpu_ids) > 1:
        # DataParallel will divide and allocate batch_size to all available GPUs
        if args.arch.startswith('alexnet') or args.arch.startswith('vgg'):
            model.features = torch.nn.DataParallel(model.features,
                                                   args.gpu_ids)
        else:
            model = torch.nn.DataParallel(model, args.gpu_ids)

    default_transform = {
        'train': get_transform(args.dataset, augment=True),
        'eval': get_transform(args.dataset, augment=False)
    }

    val_data = get_dataset(args.dataset, 'val', default_transform['eval'])
    val_loader = torch.utils.data.DataLoader(val_data,
                                             batch_size=args.batch_size,
                                             shuffle=True,
                                             num_workers=args.workers,
                                             pin_memory=True)

    # define loss function (criterion) and optimizer
    criterion = nn.CrossEntropyLoss().to(device)

    train_data = get_dataset(args.dataset, 'train', default_transform['train'])
    train_loader = torch.utils.data.DataLoader(train_data,
                                               batch_size=args.batch_size,
                                               shuffle=True,
                                               num_workers=args.workers,
                                               pin_memory=True,
                                               drop_last=True)

    # TODO: replace this call by initialization on small subset of training data
    # TODO: enable for activations
    # validate(val_loader, model, criterion, args, device)

    optimizer = torch.optim.SGD(model.parameters(),
                                args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)
    # optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)
    lr_scheduler = StepLR(optimizer, step_size=args.lr_step, gamma=0.1)

    mq = None
    if args.quantize:
        if args.bn_folding:
            print(
                "Applying batch-norm folding ahead of post-training quantization"
            )
            from utils.absorb_bn import search_absorbe_bn
            search_absorbe_bn(model)

        all_convs = [
            n for n, m in model.named_modules() if isinstance(m, nn.Conv2d)
        ]
        # all_convs = [l for l in all_convs if 'downsample' not in l]
        all_relu = [
            n for n, m in model.named_modules() if isinstance(m, nn.ReLU)
        ]
        all_relu6 = [
            n for n, m in model.named_modules() if isinstance(m, nn.ReLU6)
        ]
        layers = all_relu[1:-1] + all_relu6[1:-1] + all_convs[1:]
        replacement_factory = {
            nn.ReLU: ActivationModuleWrapper,
            nn.ReLU6: ActivationModuleWrapper,
            nn.Conv2d: ParameterModuleWrapper
        }
        mq = ModelQuantizer(
            model, args, layers, replacement_factory,
            OptimizerBridge(optimizer,
                            settings={
                                'algo': 'SGD',
                                'dataset': args.dataset
                            }))

        if args.resume:
            # Load quantization parameters from state dict
            mq.load_state_dict(checkpoint['state_dict'])

        mq.log_quantizer_state(ml_logger, -1)

        if args.model_freeze:
            mq.freeze()

    if args.evaluate:
        if args.log_stats:
            mean = []
            var = []
            skew = []
            kurt = []
            for n, p in model.named_parameters():
                if n.replace('.weight', '') in all_convs[1:]:
                    mu = p.mean()
                    std = p.std()
                    mean.append((n, mu.item()))
                    var.append((n, (std**2).item()))
                    skew.append((n, torch.mean(((p - mu) / std)**3).item()))
                    kurt.append((n, torch.mean(((p - mu) / std)**4).item()))
            for i in range(len(mean)):
                ml_logger.log_metric(mean[i][0] + '.mean', mean[i][1])
                ml_logger.log_metric(var[i][0] + '.var', var[i][1])
                ml_logger.log_metric(skew[i][0] + '.skewness', skew[i][1])
                ml_logger.log_metric(kurt[i][0] + '.kurtosis', kurt[i][1])

            ml_logger.log_metric('weight_mean', np.mean([s[1] for s in mean]))
            ml_logger.log_metric('weight_var', np.mean([s[1] for s in var]))
            ml_logger.log_metric('weight_skewness',
                                 np.mean([s[1] for s in skew]))
            ml_logger.log_metric('weight_kurtosis',
                                 np.mean([s[1] for s in kurt]))

        acc = validate(val_loader, model, criterion, args, device)
        ml_logger.log_metric('Val Acc1', acc)
        if args.log_stats:
            stats = ST().get_stats()
            for s in stats:
                ml_logger.log_metric(s, np.mean(stats[s]))
        return

    # evaluate on validation set
    acc1 = validate(val_loader, model, criterion, args, device)
    ml_logger.log_metric('Val Acc1', acc1, -1)

    # evaluate with k-means quantization
    # if args.model_freeze:
    # with mq.disable():
    #     acc1_nq = validate(val_loader, model, criterion, args, device)
    #     ml_logger.log_metric('Val Acc1 fp32', acc1_nq, -1)

    for epoch in range(0, args.epochs):
        # train for one epoch
        print('Timestamp Start epoch: {:%Y-%m-%d %H:%M:%S}'.format(
            datetime.datetime.now()))
        train(train_loader, model, criterion, optimizer, epoch, args, device,
              ml_logger, val_loader, mq)
        print('Timestamp End epoch: {:%Y-%m-%d %H:%M:%S}'.format(
            datetime.datetime.now()))

        if not args.lr_freeze:
            lr_scheduler.step()

        # evaluate on validation set
        acc1 = validate(val_loader, model, criterion, args, device)
        ml_logger.log_metric('Val Acc1', acc1, step='auto')

        # evaluate with k-means quantization
        # if args.model_freeze:
        # with mq.quantization_method('kmeans'):
        #     acc1_kmeans = validate(val_loader, model, criterion, args, device)
        #     ml_logger.log_metric('Val Acc1 kmeans', acc1_kmeans, epoch)

        # with mq.disable():
        #     acc1_nq = validate(val_loader, model, criterion, args, device)
        #     ml_logger.log_metric('Val Acc1 fp32', acc1_nq,  step='auto')

        if args.quantize:
            mq.log_quantizer_state(ml_logger, epoch)

        # remember best acc@1 and save checkpoint
        is_best = acc1 > best_acc1
        best_acc1 = max(acc1, best_acc1)

        save_checkpoint(
            {
                'epoch':
                epoch + 1,
                'arch':
                args.arch,
                'state_dict':
                model.state_dict()
                if len(args.gpu_ids) == 1 else model.module.state_dict(),
                'best_acc1':
                best_acc1,
                'optimizer':
                optimizer.state_dict(),
            }, is_best)
Пример #19
0
def test():
    if config['mode'] == 'PYNATIVE':
        mode = context.PYNATIVE_MODE
    else:
        mode = context.GRAPH_MODE

    device = config['device']
    device_id = config['device_id']

    if device == 'Ascend':
        import moxing as mox
        from utils.const import DATA_PATH, MODEL_PATH, BEST_MODEL_PATH, LOG_PATH
        obs_datapath = config['obs_datapath']
        obs_saved_model = config['obs_saved_model']
        obs_best_model = config['obs_best_model']
        obs_log = config['obs_log']
        if not os.path.exists(MODEL_PATH):
            os.mkdir(MODEL_PATH)
        if not os.path.exists(BEST_MODEL_PATH):
            os.mkdir(BEST_MODEL_PATH)
        if not os.path.exists(LOG_PATH):
            os.mkdir(LOG_PATH)
        mox.file.copy_parallel(obs_datapath, DATA_PATH)
        mox.file.copy_parallel(MODEL_PATH, obs_saved_model)
        mox.file.copy_parallel(BEST_MODEL_PATH, obs_best_model)
        mox.file.copy_parallel(LOG_PATH, obs_log)

    test_dev_batch_size = config['test_dev_batch_size']

    eval_config_log = config['log_to_eval']
    data_num = config['test_dataset_size']

    eval_config = get_eval_config(eval_config_log)

    #    - use in dataset
    div = 8

    if 'best_ckpt' in eval_config.keys():
        eval_model_path = eval_config['best_ckpt']
        if device == 'Ascend':
            import moxing as mox
            from utils.const import BEST_MODEL_PATH
            eval_model_filename = eval_model_path.split('/')[-1]
            obs_best_model = config['obs_best_model']
            mox.file.copy_parallel(obs_best_model + eval_model_filename,
                                   eval_model_path)

    else:
        eval_model_path = eval_config['latest_model']
        if device == 'Ascend':
            import moxing as mox
            from utils.const import BEST_MODEL_PATH
            eval_model_filename = eval_model_path.split('/')[-1]
            obs_saved_model = config['obs_saved_model']
            mox.file.copy_parallel(obs_saved_model + eval_model_filename,
                                   eval_model_path)
        print(
            '* [WARNING] Not using the best model, but latest saved model instead.'
        )

    #   - 偏差
    has_bias = eval_config['has_bias']
    use_dropout = eval_config['use_dropout']

    #   - pad
    pad_mode = eval_config['pad_mode']

    if pad_mode == 'pad':
        padding = eval_config['padding']
    elif pad_mode == 'same':
        padding = 0
    else:
        raise ValueError(f"invalid pad mode: {pad_mode}!")

    if 'best_acc' in eval_config.keys():
        best_acc = eval_config['best_acc']
        print('* Best accuracy for the dev dataset is : {:.2f}%'.format(
            best_acc * 100))

    if device == 'GPU':
        context.set_context(mode=mode,
                            device_target=device,
                            device_id=device_id)
    elif device == 'Ascend':
        context.set_context(mode=mode, device_target=device)

    # data
    test_loader, idx2label, label2idx = get_dataset(
        phase='test',
        test_dev_batch_size=test_dev_batch_size,
        div=div,
        num_parallel_workers=4)

    net = DFCNN(num_classes=len(label2idx),
                padding=padding,
                pad_mode=pad_mode,
                has_bias=has_bias,
                use_dropout=use_dropout)

    # loads param
    param_dict = load_checkpoint(eval_model_path)
    load_param_into_net(net, param_dict)
    print('* params loaded!')

    net.set_train(False)

    converter = CTCLabelConverter(label2idx=label2idx,
                                  idx2label=idx2label,
                                  batch_size=test_dev_batch_size)

    words_num = 0
    word_error_num = 0

    limit = 0
    for data in test_loader.create_tuple_iterator():
        if limit > data_num and not data_num < 0:
            break
        img_batch, label_indices, label_batch, sequence_length, lab_len = data
        img_tensor = Tensor(img_batch, mstype.float32)
        model_predict = net(img_tensor)

        pred_str = converter.ctc_decoder(model_predict)
        label_str = converter.decode_label(label_batch, lab_len)

        for pred, lab in zip(pred_str, label_str):
            if limit > data_num and not data_num < 0:
                break
            words_n = len(lab)
            words_num += words_n

            # get edit distance
            edit_distance = get_edit_distance(lab, pred)

            if edit_distance <= words_n:
                word_error_num += edit_distance
            else:
                word_error_num += words_n
            limit += 1

    if data_num > 0:
        size = str(data_num)
    else:
        size = 'all'
    print('* [Test result] For {} datas, the accuracy is: {:.2f}%'.format(
        size, ((1 - word_error_num / words_num) * 100)))