Exemplo n.º 1
0
def main():
    # os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu_list
    FLAGS.checkpoint_path = osp.join(FLAGS.checkpoint_path,
                                     str(datetime.date.today()))
    # check if checkpoint path exists
    if not os.path.exists(FLAGS.checkpoint_path):
        os.makedirs(FLAGS.checkpoint_path)
    else:
        shutil.rmtree(FLAGS.checkpoint_path)
        os.makedirs(FLAGS.checkpoint_path)

    train_dataset_size = 1125
    val_dataset_size = 471
    train_data_generator = data_processor.generator(
        'train_1125_13_train_15_train_8_8_64_no###.h5',
        dataset_size=train_dataset_size,
        batch_size=8)
    val_data_generator = data_processor.generator(
        'val_471_15_test_8_8_64_no###.h5',
        dataset_size=val_dataset_size,
        batch_size=8)

    east = EAST_model(FLAGS.input_size)
    model = east.model
    # model = multi_gpu_model(east.model, gpus=2)
    score_map_loss_weight = K.variable(0.01, name='score_map_loss_weight')
    csv_logger = CSVLogger(filename='icdar_2015_2013.csv',
                           separator=',',
                           append=True)
    checkpoint = ModelCheckpoint(
        filepath='icdar_2015_2013_{epoch:02d}_{loss:.4f}_{val_loss:.4f}.h5',
        monitor='val_loss',
        verbose=1,
        save_best_only=False,
        save_weights_only=False,
        mode='auto',
        period=1)
    terminate_on_nan = TerminateOnNaN()
    callbacks = [csv_logger, checkpoint, terminate_on_nan]

    opt = AdamW(FLAGS.init_learning_rate)

    model.compile(loss=[
        dice_loss(east.text_region_boundary_mask, score_map_loss_weight),
        rbox_loss(east.target_score_map)
    ],
                  loss_weights=[1., 1.],
                  optimizer=opt)

    initial_epoch = 0
    history = model.fit_generator(
        train_data_generator,
        epochs=FLAGS.max_epochs,
        steps_per_epoch=train_dataset_size // FLAGS.batch_size,
        validation_data=val_data_generator,
        validation_steps=val_dataset_size // FLAGS.batch_size,
        callbacks=callbacks,
        initial_epoch=initial_epoch,
        verbose=1)
Exemplo n.º 2
0
def main(argv=None):
    os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu_list

    # check if checkpoint path exists
    if not os.path.exists(FLAGS.checkpoint_path):
        os.mkdir(FLAGS.checkpoint_path)
    else:
        #if not FLAGS.restore:
        #    shutil.rmtree(FLAGS.checkpoint_path)
        #    os.mkdir(FLAGS.checkpoint_path)
        shutil.rmtree(FLAGS.checkpoint_path)
        os.mkdir(FLAGS.checkpoint_path)

    train_data_generator = data_processor.generator(FLAGS)
    train_samples_count = data_processor.count_samples(FLAGS)

    val_data = data_processor.load_data(FLAGS)

    if len(gpus) <= 1:
        print('Training with 1 GPU')

        if FLAGS.drn:
            east = EAST_DRN_model(input_size=FLAGS.input_size)
        else:
            east = EAST_model(FLAGS.input_size)
            
        parallel_model = east.model
    else:
        print('Training with %d GPUs' % len(gpus))
        with tf.device("/cpu:0"):
            east = EAST_model(FLAGS.input_size)
        if FLAGS.restore_model is not '':
            east.model.load_weights(FLAGS.restore_model)
        parallel_model = multi_gpu_model(east.model, gpus=len(gpus))

    score_map_loss_weight = K.variable(0.01, name='score_map_loss_weight')

    small_text_weight = K.variable(0., name='small_text_weight')

    lr_scheduler = LearningRateScheduler(lr_decay)
    ckpt = CustomModelCheckpoint(model=east.model, path=FLAGS.checkpoint_path + '/model-{epoch:02d}.h5', period=FLAGS.save_checkpoint_epochs, save_weights_only=True)
    tb = CustomTensorBoard(log_dir=FLAGS.checkpoint_path + '/train', score_map_loss_weight=score_map_loss_weight, small_text_weight=small_text_weight, data_generator=train_data_generator, write_graph=True)
    small_text_weight_callback = SmallTextWeight(small_text_weight)
    validation_evaluator = ValidationEvaluator(val_data, validation_log_dir=FLAGS.checkpoint_path + '/val')
    callbacks = [lr_scheduler, ckpt, tb, small_text_weight_callback, validation_evaluator]
    opt = AdamW(FLAGS.init_learning_rate)

    parallel_model.compile(loss=[dice_loss(east.overly_small_text_region_training_mask, east.text_region_boundary_training_mask, score_map_loss_weight, small_text_weight),
                                 rbox_loss(east.overly_small_text_region_training_mask, east.text_region_boundary_training_mask, small_text_weight, east.target_score_map)],
                           loss_weights=[1., 1.],
                           optimizer=opt)
    east.model.summary()

    model_json = east.model.to_json()
    with open(FLAGS.checkpoint_path + '/model.json', 'w') as json_file:
        json_file.write(model_json)

    history = parallel_model.fit_generator(train_data_generator, epochs=FLAGS.max_epochs, steps_per_epoch=train_samples_count/FLAGS.batch_size, workers=FLAGS.nb_workers, use_multiprocessing=True, callbacks=callbacks, verbose=1)
Exemplo n.º 3
0
def pretrain_generator(netG, module, param, batch_size):
    outel = list(netG._modules.values())[-1].weight.shape[0]
    dwidth = min(1024, outel)
    netD = make_netD(dwidth, batch_size)

    print(
        f"Layer size: {outel}, G params: {param_count(netG)}, D params: {param_count(netD)}"
    )
    optimG = AdamW(netG.parameters(), lr=5e-4, weight_decay=1e-4)
    optimD = AdamW(netD.parameters(), lr=5e-5, weight_decay=1e-4)

    i = 0
    d_adv_meter = AverageMeter()

    while True:
        netG.zero_grad()
        netD.zero_grad()

        z = fast_randn((batch_size, 256), requires_grad=True, device=device)
        q = netG(z)

        free_params([netD])
        freeze_params([netG])

        noise = codes_with_dropout(generate_noise(module, param, batch_size),
                                   dwidth)
        codes = codes_with_dropout(q, dwidth)
        d_real = netD(noise)
        d_fake = netD(codes)

        interp = random_interpolate(noise, codes, device=device)
        gp = calc_gradient_penalty(netD, interp, device=device)
        d_adv = d_fake.mean() - d_real.mean()
        d_loss = d_adv + 10 * gp
        d_adv_meter.update(d_adv.item())
        d_loss.backward(retain_graph=True)

        optimD.step()
        freeze_params([netD])
        free_params([netG])

        d_fake_loss = -d_fake.mean()
        d_fake_loss.backward()

        optimG.step()
        if i % 50 == 0:
            print(d_adv_meter.avg, gp.item())
            if i > 2000 and d_adv_meter.avg > 0:
                break

            d_adv_meter.reset()
        i += 1
Exemplo n.º 4
0
def main():
    train_data_generator = DataGenerator(input_size=FLAGS.input_size, batch_size=FLAGS.batch_size,
                                         data_path=FLAGS.training_data_path, FLAGS=FLAGS, is_train=True)
    train_samples_count = len(train_data_generator.image_paths)
    validation_data_generator = DataGenerator(input_size=FLAGS.input_size, batch_size=FLAGS.batch_size,
                                              data_path=FLAGS.validation_data_path, FLAGS=FLAGS, is_train=False)

    east = EastModel(FLAGS.input_size)
    if FLAGS.pretrained_weights_path != '':
        print(f'Loading pre-trained model at {FLAGS.pretrained_weights_path}')
        east.model.load_weights(FLAGS.pretrained_weights_path)

    score_map_loss_weight = K.variable(0.01, name='score_map_loss_weight')
    small_text_weight = K.variable(0., name='small_text_weight')

    opt = AdamW(FLAGS.init_learning_rate)
    east.model.compile(
        loss=[
            dice_loss(east.overly_small_text_region_training_mask, east.text_region_boundary_training_mask,
                      score_map_loss_weight, small_text_weight),
            rbox_loss(east.overly_small_text_region_training_mask, east.text_region_boundary_training_mask,
                      small_text_weight, east.target_score_map)
        ],
        loss_weights=[1., 1.],
        optimizer=opt,
    )

    tb_callback = tensorboard_callback()
    cp_callback = checkpoint_callback()

    with open(os.path.join(FLAGS.checkpoint_path, 'model.json'), 'w') as json_file:
        json_file.write(east.model.to_json())

    east.model.fit_generator(
        generator=train_data_generator,
        epochs=FLAGS.max_epochs,
        steps_per_epoch=train_samples_count // FLAGS.batch_size,
        validation_data=validation_data_generator,

        callbacks=[cp_callback, tb_callback],

        workers=FLAGS.nb_workers,
        use_multiprocessing=True,
        max_queue_size=10,

        verbose=1,
    )
Exemplo n.º 5
0
                            batch_size=1,
                            shuffle=False)

    test_set = RealDataset(opt.real_path, opt.channels, split='test')
    test_loader = DataLoader(dataset=test_set,
                             num_workers=0,
                             batch_size=1,
                             shuffle=False)

    opt.n_classes = train_set.n_classes
    net = PowderNet(opt.arch, opt.n_channels, train_set.n_classes)
    net = net.cuda()
    optimizer = AdamW([{
        'params': get_1x_lr_params(net)
    }, {
        'params': get_10x_lr_params(net),
        'lr': opt.lr * 10
    }],
                      lr=opt.lr,
                      weight_decay=opt.decay)
    scheduler = CosineLRWithRestarts(optimizer, opt.batch_size, len(train_set),
                                     opt.period, opt.t_mult)
    vis = Visualizer(server=opt.server, env=opt.env)
    start_epoch = 0
    if opt.resume is not None:
        checkpoint = torch.load(opt.resume)
        old_opt = checkpoint['opt']
        assert (old_opt.channels == opt.channels)
        assert (old_opt.bands == opt.bands)
        assert (old_opt.arch == opt.arch)
        assert (old_opt.blend == opt.blend)
        assert (old_opt.lr == opt.lr)
Exemplo n.º 6
0
def main(config):
    seed_all()
    os.makedirs('cache', exist_ok=True)
    os.makedirs(config.logdir, exist_ok=True)
    print("Logging to: %s" % config.logdir)
    src_files = sorted(glob('*.py'))
    for src_fn in src_files:
        dst_fn = os.path.join(config.logdir, src_fn)
        copyfile(src_fn, dst_fn)

    train_image_fns = sorted(glob(os.path.join(config.train_dir, '*.jpg')))
    test_image_fns = sorted(glob(os.path.join(config.test_dir, '*.jpg')))

    assert len(train_image_fns) == 3881
    assert len(test_image_fns) == 4150

    gt, label_to_int = load_gt(config.train_rle)
    int_to_label = {v: k for k, v in label_to_int.items()}
    # create folds
    np.random.shuffle(train_image_fns)

    if config.subset > 0:
        train_image_fns = train_image_fns[:config.subset]

    folds = np.arange(len(train_image_fns)) % config.num_folds
    val_image_fns = [
        fn for k, fn in enumerate(train_image_fns) if folds[k] == config.fold
    ]
    train_image_fns = [
        fn for k, fn in enumerate(train_image_fns) if folds[k] != config.fold
    ]

    if config.add_val:
        print("Training on validation set")
        train_image_fns = train_image_fns + val_image_fns[:]

    print(len(val_image_fns), len(train_image_fns))

    # TODO: drop empty images <- is this helpful?
    train_image_fns = [
        fn for fn in train_image_fns if KuzushijiDataset.fn_to_id(fn) in gt
    ]
    val_image_fns = [
        fn for fn in val_image_fns if KuzushijiDataset.fn_to_id(fn) in gt
    ]

    print("VAL: ", len(val_image_fns), val_image_fns[123])
    print("TRAIN: ", len(train_image_fns), train_image_fns[456])

    train_ds = KuzushijiDataset(train_image_fns,
                                gt_boxes=gt,
                                label_to_int=label_to_int,
                                augment=True)
    val_ds = KuzushijiDataset(val_image_fns,
                              gt_boxes=gt,
                              label_to_int=label_to_int)

    if config.cache:
        train_ds.cache()
        val_ds.cache()

    val_loader = data.DataLoader(val_ds,
                                 batch_size=config.batch_size // 8,
                                 shuffle=False,
                                 num_workers=config.num_workers,
                                 pin_memory=config.pin,
                                 drop_last=False)

    model = FPNSegmentation(config.slug)
    if config.weight is not None:
        print("Loading: %s" % config.weight)
        model.load_state_dict(th.load(config.weight))
    model = model.to(config.device)

    no_decay = ['mean', 'std', 'bias'] + ['.bn%d.' % i for i in range(100)]
    grouped_parameters = [{
        'params': [],
        'weight_decay': config.weight_decay
    }, {
        'params': [],
        'weight_decay': 0.0
    }]
    for n, p in model.named_parameters():
        if not any(nd in n for nd in no_decay):
            # print("Decay: %s" % n)
            grouped_parameters[0]['params'].append(p)
        else:
            # print("No Decay: %s" % n)
            grouped_parameters[1]['params'].append(p)
    optimizer = AdamW(grouped_parameters, lr=config.lr)

    if config.apex:
        model, optimizer = apex.amp.initialize(model,
                                               optimizer,
                                               opt_level="O1",
                                               verbosity=0)

    updates_per_epoch = len(train_ds) // config.batch_size
    num_updates = int(config.epochs * updates_per_epoch)
    scheduler = WarmupLinearSchedule(warmup=config.warmup, t_total=num_updates)

    # training loop
    smooth = 0.1
    best_acc = 0.0
    best_fn = None
    global_step = 0
    for epoch in range(1, config.epochs + 1):
        smooth_loss = None
        smooth_accuracy = None
        model.train()
        train_loader = data.DataLoader(train_ds,
                                       batch_size=config.batch_size,
                                       shuffle=True,
                                       num_workers=config.num_workers,
                                       pin_memory=config.pin,
                                       drop_last=True)
        progress = tqdm(total=len(train_ds), smoothing=0.01)
        if True:
            for i, (X, fns, hm, centers, classes) in enumerate(train_loader):
                X = X.to(config.device).float()
                hm = hm.to(config.device)
                centers = centers.to(config.device)
                classes = classes.to(config.device)
                hm_pred, classes_pred = model(X, centers=centers)
                loss = kuzushiji_loss(hm, centers, classes, hm_pred,
                                      classes_pred)
                if config.apex:
                    with apex.amp.scale_loss(loss, optimizer) as scaled_loss:
                        scaled_loss.backward()
                else:
                    loss.backward()

                lr_this_step = None
                if (i + 1) % config.accumulation_step == 0:
                    optimizer.step()
                    optimizer.zero_grad()
                    lr_this_step = config.lr * scheduler.get_lr(
                        global_step, config.warmup)
                    for param_group in optimizer.param_groups:
                        param_group['lr'] = lr_this_step
                    global_step += 1

                smooth_loss = loss.item() if smooth_loss is None else \
                    smooth * loss.item() + (1. - smooth) * smooth_loss
                # print((y_true >= 0.5).sum().item())
                accuracy = th.mean(
                    ((th.sigmoid(hm_pred) >= 0.5) == (hm == 1)).to(
                        th.float)).item()
                smooth_accuracy = accuracy if smooth_accuracy is None else \
                    smooth * accuracy + (1. - smooth) * smooth_accuracy
                progress.set_postfix(
                    ep='%d/%d' % (epoch, config.epochs),
                    loss='%.4f' % smooth_loss,
                    accuracy='%.4f' % (smooth_accuracy),
                    lr='%.6f' %
                    (config.lr if lr_this_step is None else lr_this_step))
                progress.update(len(X))

        # skip validation
        if epoch not in [10, 20, 30, 40, 50]:
            if 1 < epoch <= 65:
                continue

        # validation loop
        model.eval()
        progress = tqdm(enumerate(val_loader), total=len(val_loader))
        hm_correct, classes_correct = 0, 0
        num_hm, num_classes = 0, 0
        with th.no_grad():
            for i, (X, fns, hm, centers, classes) in progress:
                X = X.to(config.device).float()
                hm = hm.cuda()
                centers = centers.cuda()
                classes = classes.cuda()
                hm_pred, classes_pred = model(X)
                hm_pred = th.sigmoid(hm_pred)
                classes_pred = th.nn.functional.softmax(classes_pred, 1)
                hm_cuda = hm.cuda()
                # PyTorch 1.2 has `bool`
                if hasattr(hm_cuda, 'bool'):
                    hm_cuda = hm_cuda.bool()
                hm_correct += (hm_cuda == (hm_pred >=
                                           0.5)).float().sum().item()
                num_hm += np.prod(hm.shape)
                num_samples = len(X)
                for sample_ind in range(num_samples):
                    center_mask = centers[sample_ind, :, 0] != -1
                    per_image_letters = center_mask.sum().item()
                    if per_image_letters == 0:
                        continue
                    num_classes += per_image_letters
                    centers_per_img = centers[sample_ind][center_mask]
                    classes_per_img = classes[sample_ind][center_mask]
                    classes_per_img_pred = classes_pred[
                        sample_ind][:, centers_per_img[:, 1],
                                    centers_per_img[:, 0]].argmax(0)
                    classes_correct += (
                        classes_per_img_pred == classes_per_img).sum().item()
                    num_classes += per_image_letters

        val_hm_acc = hm_correct / num_hm
        val_classes_acc = classes_correct / num_classes
        summary_str = 'f%02d-ep-%04d-val_hm_acc-%.4f-val_classes_acc-%.4f' % (
            config.fold, epoch, val_hm_acc, val_classes_acc)

        progress.write(summary_str)
        if val_classes_acc >= best_acc:
            weight_fn = os.path.join(config.logdir, summary_str + '.pth')
            progress.write("New best: %s" % weight_fn)
            th.save(model.state_dict(), weight_fn)
            best_acc = val_classes_acc
            best_fn = weight_fn
            fns = sorted(
                glob(os.path.join(config.logdir, 'f%02d-*.pth' % config.fold)))
            for fn in fns[:-config.n_keep]:
                os.remove(fn)

    # create submission
    test_ds = KuzushijiDataset(test_image_fns)
    test_loader = data.DataLoader(test_ds,
                                  batch_size=config.batch_size // 8,
                                  shuffle=False,
                                  num_workers=config.num_workers,
                                  pin_memory=False,
                                  drop_last=False)
    if best_fn is not None:
        model.load_state_dict(th.load(best_fn))
    model.eval()
    sub = create_submission(model,
                            test_loader,
                            int_to_label,
                            config,
                            pred_zip=config.pred_zip)
    sub.to_csv(config.submission_fn, index=False)
    print("Wrote to: %s" % config.submission_fn)

    # create val submission
    val_fn = config.submission_fn.replace('.csv', '_VAL.csv')
    model.eval()
    sub = []
    sub = create_submission(model,
                            val_loader,
                            int_to_label,
                            config,
                            pred_zip=config.pred_zip.replace(
                                '.zip', '_VAL.zip'))
    sub.to_csv(val_fn, index=False)
    print("Wrote to: %s" % val_fn)
Exemplo n.º 7
0
def main_worker(gpu, parallel, args, result_dir):
    if parallel:
        args.rank = args.rank + gpu
        torch.distributed.init_process_group(backend='nccl',
                                             init_method=args.dist_url,
                                             world_size=args.world_size,
                                             rank=args.rank)
    torch.backends.cudnn.benchmark = True
    random_seed(args.seed +
                args.rank)  # make data aug different for different processes
    torch.cuda.set_device(gpu)

    assert args.batch_size % args.world_size == 0
    from dataset import load_data, get_statistics, default_eps, input_dim
    train_loader, test_loader = load_data(args.dataset,
                                          'data/',
                                          args.batch_size // args.world_size,
                                          parallel,
                                          augmentation=True,
                                          classes=None)
    mean, std = get_statistics(args.dataset)
    num_classes = len(train_loader.dataset.classes)

    from model.bound_module import Predictor, BoundFinalIdentity
    from model.mlp import MLPFeature, MLP
    from model.conv import ConvFeature, Conv
    model_name, params = parse_function_call(args.model)
    if args.predictor_hidden_size > 0:
        model = locals()[model_name](input_dim=input_dim[args.dataset],
                                     **params)
        predictor = Predictor(model.out_features, args.predictor_hidden_size,
                              num_classes)
    else:
        model = locals()[model_name](input_dim=input_dim[args.dataset],
                                     num_classes=num_classes,
                                     **params)
        predictor = BoundFinalIdentity()
    model = Model(model, predictor, eps=0)
    model = model.cuda(gpu)
    if parallel:
        model = torch.nn.parallel.DistributedDataParallel(model,
                                                          device_ids=[gpu])

    loss_name, params = parse_function_call(args.loss)
    loss = Loss(globals()[loss_name](**params), args.kappa)

    output_flag = not parallel or gpu == 0
    if output_flag:
        logger = Logger(os.path.join(result_dir, 'log.txt'))
        for arg in vars(args):
            logger.print(arg, '=', getattr(args, arg))
        logger.print(train_loader.dataset.transform)
        logger.print(model)
        logger.print('number of params: ',
                     sum([p.numel() for p in model.parameters()]))
        logger.print('Using loss', loss)
        train_logger = TableLogger(os.path.join(result_dir, 'train.log'),
                                   ['epoch', 'loss', 'acc'])
        test_logger = TableLogger(os.path.join(result_dir, 'test.log'),
                                  ['epoch', 'loss', 'acc'])
    else:
        logger = train_logger = test_logger = None

    optimizer = AdamW(model,
                      lr=args.lr,
                      weight_decay=args.wd,
                      betas=(args.beta1, args.beta2),
                      eps=args.epsilon)

    if args.checkpoint:
        assert os.path.isfile(args.checkpoint)
        if parallel:
            torch.distributed.barrier()
        checkpoint = torch.load(
            args.checkpoint,
            map_location=lambda storage, loc: storage.cuda(gpu))
        state_dict = checkpoint['state_dict']
        if next(iter(state_dict))[0:7] == 'module.' and not parallel:
            new_state_dict = OrderedDict([(k[7:], v)
                                          for k, v in state_dict.items()])
            state_dict = new_state_dict
        elif next(iter(state_dict))[0:7] != 'module.' and parallel:
            new_state_dict = OrderedDict([('module.' + k, v)
                                          for k, v in state_dict.items()])
            state_dict = new_state_dict
        model.load_state_dict(state_dict)
        optimizer.load_state_dict(checkpoint['optimizer'])
        print("=> loaded '{}'".format(args.checkpoint))
        if parallel:
            torch.distributed.barrier()

    if args.eps_test is None:
        args.eps_test = default_eps[args.dataset]
    if args.eps_train is None:
        args.eps_train = args.eps_test
    args.eps_train /= std
    args.eps_test /= std
    up = torch.FloatTensor((1 - mean) / std).view(-1, 1, 1).cuda(gpu)
    down = torch.FloatTensor((0 - mean) / std).view(-1, 1, 1).cuda(gpu)
    attacker = AttackPGD(model,
                         args.eps_test,
                         step_size=args.eps_test / 4,
                         num_steps=20,
                         up=up,
                         down=down)
    args.epochs = [int(epoch) for epoch in args.epochs.split(',')]
    schedule = create_schedule(args, len(train_loader), model, loss, optimizer)

    if args.visualize and output_flag:
        from torch.utils.tensorboard import SummaryWriter
        writer = SummaryWriter(result_dir)
    else:
        writer = None

    for epoch in range(args.start_epoch, args.epochs[-1]):
        if parallel:
            train_loader.sampler.set_epoch(epoch)
        train_loss, train_acc = train(model, loss, epoch, train_loader,
                                      optimizer, schedule, logger,
                                      train_logger, gpu, parallel,
                                      args.print_freq)
        test_loss, test_acc = test(model, loss, epoch, test_loader, logger,
                                   test_logger, gpu, parallel, args.print_freq)
        if writer is not None:
            writer.add_scalar('curve/p', get_p_norm(model), epoch)
            writer.add_scalar('curve/train loss', train_loss, epoch)
            writer.add_scalar('curve/test loss', test_loss, epoch)
            writer.add_scalar('curve/train acc', train_acc, epoch)
            writer.add_scalar('curve/test acc', test_acc, epoch)
        if epoch % 50 == 49:
            if logger is not None:
                logger.print(
                    'Generate adversarial examples on training dataset and test dataset (fast, inaccurate)'
                )
            robust_train_acc = gen_adv_examples(model,
                                                attacker,
                                                train_loader,
                                                gpu,
                                                parallel,
                                                logger,
                                                fast=True)
            robust_test_acc = gen_adv_examples(model,
                                               attacker,
                                               test_loader,
                                               gpu,
                                               parallel,
                                               logger,
                                               fast=True)
            if writer is not None:
                writer.add_scalar('curve/robust train acc', robust_train_acc,
                                  epoch)
                writer.add_scalar('curve/robust test acc', robust_test_acc,
                                  epoch)
        if epoch % 5 == 4:
            certified_acc = certified_test(model, args.eps_test, up, down,
                                           epoch, test_loader, logger, gpu,
                                           parallel)
            if writer is not None:
                writer.add_scalar('curve/certified acc', certified_acc, epoch)
        if epoch > args.epochs[-1] - 3:
            if logger is not None:
                logger.print("Generate adversarial examples on test dataset")
            gen_adv_examples(model, attacker, test_loader, gpu, parallel,
                             logger)
            certified_test(model, args.eps_test, up, down, epoch, test_loader,
                           logger, gpu, parallel)

    schedule(args.epochs[-1], 0)
    if output_flag:
        logger.print(
            "Calculate certified accuracy on training dataset and test dataset"
        )
    certified_test(model, args.eps_test, up, down, args.epochs[-1],
                   train_loader, logger, gpu, parallel)
    certified_test(model, args.eps_test, up, down, args.epochs[-1],
                   test_loader, logger, gpu, parallel)

    if output_flag:
        torch.save(
            {
                'state_dict': model.state_dict(),
                'optimizer': optimizer.state_dict(),
            }, os.path.join(result_dir, 'model.pth'))
    if writer is not None:
        writer.close()
Exemplo n.º 8
0
    Path(opt.out_path).mkdir(parents=True, exist_ok=True)

    train_set = HalfHalfDataset(opt.real_path, opt.syn_path, opt.params_path, opt.blend, opt.channels, opt.split)
    train_loader = DataLoader(dataset=train_set, num_workers=opt.threads, batch_size=opt.batch_size, shuffle=True, pin_memory=True)

    val_set = RealDataset(opt.real_path, opt.channels, split='val')
    val_loader = DataLoader(dataset=val_set, num_workers=0, batch_size=1, shuffle=False)

    test_set = RealDataset(opt.real_path, opt.channels, split='test')
    test_loader = DataLoader(dataset=test_set, num_workers=0, batch_size=1, shuffle=False)

    opt.n_classes = train_set.n_classes
    net = PowderNet(opt.arch, opt.n_channels, train_set.n_classes)
    net = net.cuda()
    optimizer = AdamW([{'params': get_1x_lr_params(net)}, {'params': get_10x_lr_params(net), 'lr': opt.lr * 10}], lr=opt.lr, weight_decay=opt.decay)
    scheduler = CosineLRWithRestarts(optimizer, opt.batch_size, len(train_set), opt.period, opt.t_mult)
    vis = Visualizer(server=opt.server, env=opt.env)
    start_epoch = 0
    if opt.resume is not None:
        checkpoint = torch.load(opt.resume)
        old_opt = checkpoint['opt']
        assert(old_opt.channels == opt.channels)
        assert(old_opt.bands == opt.bands)
        assert(old_opt.arch == opt.arch)
        assert(old_opt.blend == opt.blend)
        assert(old_opt.lr == opt.lr)
        assert(old_opt.decay == opt.decay)
        assert(old_opt.period == opt.period)
        assert(old_opt.t_mult == opt.t_mult)
        net.load_state_dict(checkpoint['state_dict'])
def main():
    print(args)

    if not osp.exists(args.dir):
        os.makedirs(args.dir)

    if args.use_gpu:
        torch.cuda.set_device(args.gpu)
        cudnn.enabled = True
        cudnn.benchmark = True

    if args.manualSeed is None:
        args.manualSeed = random.randint(1, 10000)
    np.random.seed(args.manualSeed)

    labeled_size = args.label_num + args.val_num

    num_classes = 10
    data_dir = '../cifar10_data/'

    normalize = transforms.Normalize(mean=[0.4914, 0.4822, 0.4465],
                                     std=[0.2470, 0.2435, 0.2616])

    # transform is implemented inside zca dataloader
    dataloader = cifar.CIFAR10
    if args.auto:
        transform_train = transforms.Compose([
            transforms.RandomCrop(
                32, padding=4, fill=128
            ),  # fill parameter needs torchvision installed from source
            transforms.RandomHorizontalFlip(),
            CIFAR10Policy(),
            transforms.ToTensor(),
            Cutout(
                n_holes=1, length=16
            ),  # (https://github.com/uoguelph-mlrg/Cutout/blob/master/util/cutout.py)
            normalize
        ])
    else:
        transform_train = transforms.Compose([
            transforms.RandomCrop(
                32, padding=4, fill=128
            ),  # fill parameter needs torchvision installed from source
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            normalize
        ])

    transform_test = transforms.Compose([transforms.ToTensor(), normalize])

    base_dataset = datasets.CIFAR10(data_dir, train=True, download=True)
    train_labeled_idxs, train_unlabeled_idxs, val_idxs = train_val_split(
        base_dataset.targets, int(args.label_num / 10))

    labelset = CIFAR10_labeled(data_dir,
                               train_labeled_idxs,
                               train=True,
                               transform=transform_train)
    labelset2 = CIFAR10_labeled(data_dir,
                                train_labeled_idxs,
                                train=True,
                                transform=transform_test)
    unlabelset = CIFAR10_labeled(data_dir,
                                 train_unlabeled_idxs,
                                 train=True,
                                 transform=transform_train)
    unlabelset2 = CIFAR10_labeled(data_dir,
                                  train_unlabeled_idxs,
                                  train=True,
                                  transform=transform_test)
    validset = CIFAR10_labeled(data_dir,
                               val_idxs,
                               train=True,
                               transform=transform_test)
    testset = CIFAR10_labeled(data_dir, train=False, transform=transform_test)

    label_y = np.array(labelset.targets).astype(np.int32)
    unlabel_y = np.array(unlabelset.targets).astype(np.int32)
    unlabel_num = unlabel_y.shape[0]

    label_loader = torch.utils.data.DataLoader(labelset,
                                               batch_size=args.batch_size,
                                               num_workers=args.num_workers,
                                               pin_memory=True,
                                               drop_last=True)

    label_loader2 = torch.utils.data.DataLoader(
        labelset2,
        batch_size=args.eval_batch_size,
        num_workers=args.num_workers,
        pin_memory=True)

    unlabel_loader = torch.utils.data.DataLoader(
        unlabelset,
        batch_size=args.eval_batch_size,
        num_workers=args.num_workers,
        pin_memory=True)

    unlabel_loader2 = torch.utils.data.DataLoader(
        unlabelset2,
        batch_size=args.eval_batch_size,
        num_workers=args.num_workers,
        pin_memory=True)

    validloader = torch.utils.data.DataLoader(validset,
                                              batch_size=args.eval_batch_size,
                                              num_workers=args.num_workers,
                                              pin_memory=True)

    testloader = torch.utils.data.DataLoader(testset,
                                             batch_size=args.eval_batch_size,
                                             num_workers=args.num_workers,
                                             pin_memory=True)

    #initialize models
    model1 = create_model(args.num_classes, args.model)
    model2 = create_model(args.num_classes, args.model)
    ema_model = create_model(args.num_classes, args.model)

    if args.use_gpu:
        model1 = model1.cuda()
        model2 = model2.cuda()
        ema_model = ema_model.cuda()

    for param in ema_model.parameters():
        param.detach_()

    df = pd.DataFrame()
    stats_path = osp.join(args.dir, 'stats.txt')
    '''if prop > args.scale:
        prop = args.scale'''

    optimizer1 = AdamW(model1.parameters(), lr=args.lr)

    if args.init1 and osp.exists(args.init1):
        model1.load_state_dict(
            torch.load(args.init1, map_location='cuda:{}'.format(args.gpu)))

    ema_optimizer = WeightEMA(model1, ema_model, alpha=args.ema_decay)

    if args.init and osp.exists(args.init):
        model1.load_state_dict(
            torch.load(args.init, map_location='cuda:{}'.format(args.gpu)))

    _, best_acc = evaluate(validloader, ema_model, prefix='val')

    best_ema_path = osp.join(args.dir, 'best_ema.pth')
    best_model1_path = osp.join(args.dir, 'best_model1.pth')
    best_model2_path = osp.join(args.dir, 'best_model2.pth')
    init_path = osp.join(args.dir, 'init_ema.pth')
    init_path1 = osp.join(args.dir, 'init1.pth')
    init_path2 = osp.join(args.dir, 'init2.pth')
    torch.save(ema_model.state_dict(), init_path)
    torch.save(model1.state_dict(), init_path1)
    torch.save(model2.state_dict(), init_path2)
    torch.save(ema_model.state_dict(), best_ema_path)
    torch.save(model1.state_dict(), best_model1_path)
    skip_model2 = False
    end_iter = False

    confident_indices = np.array([], dtype=np.int64)
    all_indices = np.arange(unlabel_num).astype(np.int64)
    #no_help_indices = np.array([]).astype(np.int64)
    pseudo_labels = np.zeros(all_indices.shape, dtype=np.int32)

    steps_per_epoch = len(iter(label_loader))
    max_epoch = args.steps // steps_per_epoch

    logger = logging.getLogger('init')
    file_handler = logging.FileHandler(osp.join(args.dir, 'init.txt'))
    logger.addHandler(file_handler)
    logger.setLevel(logging.INFO)

    for epoch in range(max_epoch * 4 // 5):
        if args.mix:
            train_init_mix(label_loader,
                           model1,
                           optimizer1,
                           ema_optimizer,
                           steps_per_epoch,
                           epoch,
                           logger=logger)
        else:
            train_init(label_loader,
                       model1,
                       optimizer1,
                       ema_optimizer,
                       steps_per_epoch,
                       epoch,
                       logger=logger)

        if epoch % 10 == 0:
            val_loss, val_acc = evaluate(validloader, ema_model, logger,
                                         'valid')
            if val_acc >= best_acc:
                best_acc = val_acc
                evaluate(testloader, ema_model, logger, 'test')
                torch.save(ema_model.state_dict(), init_path)
                torch.save(model1.state_dict(), init_path1)

    adjust_learning_rate_adam(optimizer1, args.lr * 0.2)

    for epoch in range(max_epoch // 5):
        if args.mix:
            train_init_mix(label_loader,
                           model1,
                           optimizer1,
                           ema_optimizer,
                           steps_per_epoch,
                           epoch,
                           logger=logger)
        else:
            train_init(label_loader,
                       model1,
                       optimizer1,
                       ema_optimizer,
                       steps_per_epoch,
                       epoch,
                       logger=logger)

        if epoch % 10 == 0:
            val_loss, val_acc = evaluate(validloader, ema_model, logger,
                                         'valid')
            if val_acc >= best_acc:
                best_acc = val_acc
                evaluate(testloader, ema_model, logger, 'test')
                torch.save(ema_model.state_dict(), init_path)
                torch.save(model1.state_dict(), init_path1)

    ema_model.load_state_dict(torch.load(init_path))
    model1.load_state_dict(torch.load(init_path1))

    logger.info('init train finished')
    evaluate(validloader, ema_model, logger, 'valid')
    evaluate(testloader, ema_model, logger, 'test')

    for i_round in range(args.round):
        mask = np.zeros(all_indices.shape, dtype=bool)
        mask[confident_indices] = True
        other_indices = all_indices[~mask]

        optimizer2 = AdamW(model2.parameters(), lr=args.lr)

        logger = logging.getLogger('model2_round_{}'.format(i_round))
        file_handler = logging.FileHandler(
            osp.join(args.dir, 'model2_round_{}.txt'.format(i_round)))
        logger.addHandler(file_handler)
        logger.setLevel(logging.INFO)

        if args.auto:
            probs = predict_probs(ema_model, unlabel_loader2)
        else:
            probs = np.zeros((unlabel_num, args.num_classes))
            for i in range(args.K):
                probs += predict_probs(ema_model, unlabel_loader)
            probs /= args.K

        pseudo_labels[other_indices] = probs.argmax(axis=1).astype(
            np.int32)[other_indices]
        #pseudo_labels = probs.argmax(axis=1).astype(np.int32)

        df2 = create_basic_stats_dataframe()
        df2['iter'] = i_round
        df2['train_acc'] = accuracy_score(unlabel_y, pseudo_labels)
        df = df.append(df2, ignore_index=True)
        df.to_csv(stats_path, index=False)

        #phase2: train model2
        unlabelset.targets = pseudo_labels.copy()
        trainset = ConcatDataset([labelset, unlabelset])

        trainloader = torch.utils.data.DataLoader(trainset,
                                                  batch_size=args.batch_size2,
                                                  num_workers=args.num_workers,
                                                  pin_memory=True,
                                                  shuffle=True)

        model2.load_state_dict(torch.load(init_path2))
        best_val_epoch = 0
        best_model2_acc = 0

        steps_per_epoch = len(iter(trainloader))
        max_epoch2 = args.steps2 // steps_per_epoch

        for epoch in range(max_epoch2):
            train_model2(trainloader, model2, optimizer2, epoch, logger=logger)

            val_loss, val_acc = evaluate(validloader, model2, logger, 'val')

            if val_acc >= best_model2_acc:
                best_model2_acc = val_acc
                best_val_epoch = epoch
                torch.save(model2.state_dict(), best_model2_path)
                evaluate(testloader, model2, logger, 'test')

            if (epoch - best_val_epoch) * steps_per_epoch > args.stop_steps2:
                break

        df.loc[df['iter'] == i_round, 'valid_acc'] = best_model2_acc
        df.loc[df['iter'] == i_round, 'valid_epoch'] = best_val_epoch
        df.to_csv(stats_path, index=False)

        model2.load_state_dict(torch.load(best_model2_path))
        logger.info('model2 train finished')

        evaluate(trainloader, model2, logger, 'train')

        evaluate(validloader, model2, logger, 'val')
        evaluate(label_loader2, model2, logger, 'reward')
        evaluate(testloader, model2, logger, 'test')
        #phase3: get confidence of unlabeled data by labeled data, split confident and unconfident data
        '''if args.auto:
            probs  = predict_probs(model2,unlabel_loader2)
        else:
            probs = np.zeros((unlabel_num,args.num_classes))
            for i in range(args.K):
                probs += predict_probs(model2, unlabel_loader)
            probs /= args.K'''

        probs = predict_probs(model2, unlabel_loader2)
        new_pseudo_labels = probs.argmax(axis=1)

        confidences = probs[all_indices, pseudo_labels]

        if args.schedule == 'exp':
            confident_num = int((len(confident_indices) + args.label_num) *
                                (1 + args.scale)) - args.label_num
        elif args.schedule == 'linear':
            confident_num = len(confident_indices) + int(
                unlabel_num * args.scale)

        old_confident_indices = confident_indices.copy()
        confident_indices = np.array([], dtype=np.int64)

        for j in range(args.num_classes):
            j_cands = (pseudo_labels == j)
            k_size = int(min(confident_num // args.num_classes, j_cands.sum()))
            logger.info('class: {}, confident size: {}'.format(j, k_size))
            if k_size > 0:
                j_idx_top = all_indices[j_cands][
                    confidences[j_cands].argsort()[-k_size:]]
                confident_indices = np.concatenate(
                    (confident_indices, all_indices[j_idx_top]))
        '''new_confident_indices = np.intersect1d(new_confident_indices, np.setdiff1d(new_confident_indices, no_help_indices))
        new_confident_indices = new_confident_indices[(-confidences[new_confident_indices]).argsort()]
        confident_indices = np.concatenate((old_confident_indices, new_confident_indices))'''

        acc = accuracy_score(unlabel_y[confident_indices],
                             pseudo_labels[confident_indices])
        logger.info('confident data num:{}, prop: {:4f}, acc: {:4f}'.format(
            len(confident_indices),
            len(confident_indices) / len(unlabel_y), acc))
        '''if len(old_confident_indices) > 0:
            acc = accuracy_score(unlabel_y[old_confident_indices],pseudo_labels[old_confident_indices])        
            logger.info('old confident data prop: {:4f}, acc: {:4f}'.format(len(old_confident_indices)/len(unlabel_y), acc))

        acc = accuracy_score(unlabel_y[new_confident_indices],pseudo_labels[new_confident_indices])
        logger.info('new confident data prop: {:4f}, acc: {:4f}'.format(len(new_confident_indices)/len(unlabel_y), acc))'''

        #unlabelset.train_labels_ul = pseudo_labels.copy()
        confident_dataset = torch.utils.data.Subset(unlabelset,
                                                    confident_indices)

        #phase4: refine model1 by confident data and reward data
        #train_dataset = torch.utils.data.ConcatDataset([confident_dataset,labelset])

        logger = logging.getLogger('model1_round_{}'.format(i_round))
        file_handler = logging.FileHandler(
            osp.join(args.dir, 'model1_round_{}.txt'.format(i_round)))
        logger.addHandler(file_handler)
        logger.setLevel(logging.INFO)

        best_val_epoch = 0
        evaluate(validloader, ema_model, logger, 'valid')
        evaluate(testloader, ema_model, logger, 'test')

        optimizer1 = AdamW(model1.parameters(), lr=args.lr)

        confident_dataset = torch.utils.data.Subset(unlabelset,
                                                    confident_indices)
        trainloader = torch.utils.data.DataLoader(confident_dataset,
                                                  batch_size=args.batch_size,
                                                  num_workers=args.num_workers,
                                                  shuffle=True,
                                                  drop_last=True)

        #steps_per_epoch = len(iter(trainloader))
        steps_per_epoch = 200
        max_epoch1 = args.steps1 // steps_per_epoch

        for epoch in range(max_epoch1):
            '''current_num = int(cal_consistency_weight( (epoch + 1) * steps_per_epoch, init_ep=0, end_ep=args.stop_steps1//2, init_w=start_num, end_w=end_num))            
            current_confident_indices = confident_indices[:current_num]
            logger.info('current num: {}'.format(current_num))'''
            if args.mix:
                train_model1_mix(label_loader,
                                 trainloader,
                                 model1,
                                 optimizer1,
                                 ema_model,
                                 ema_optimizer,
                                 steps_per_epoch,
                                 epoch,
                                 logger=logger)
            else:
                train_model1(label_loader,
                             trainloader,
                             model1,
                             optimizer1,
                             ema_model,
                             ema_optimizer,
                             steps_per_epoch,
                             epoch,
                             logger=logger)

            val_loss, val_acc = evaluate(validloader, ema_model, logger,
                                         'valid')
            if val_acc >= best_acc:
                best_acc = val_acc
                best_val_epoch = epoch
                evaluate(testloader, ema_model, logger, 'test')
                torch.save(model1.state_dict(), best_model1_path)
                torch.save(ema_model.state_dict(), best_ema_path)

            if (epoch - best_val_epoch) * steps_per_epoch > args.stop_steps1:
                break

        ema_model.load_state_dict(torch.load(best_ema_path))
        model1.load_state_dict(torch.load(best_model1_path))

        logger.info('model1 train finished')
        evaluate(validloader, ema_model, logger, 'valid')
        evaluate(testloader, ema_model, logger, 'test')
        '''no_help_indices = np.concatenate((no_help_indices,confident_indices[current_num:]))
        confident_indices = confident_indices[:current_num]'''

        if len(confident_indices) >= len(all_indices):
            break
Exemplo n.º 10
0
                                   batch_size=batch_size,
                                   num_workers=12,
                                   shuffle=True,
                                   pin_memory=True,
                                   drop_last=True)
    val_data_loader = DataLoader(val_train,
                                 batch_size=val_batch_size,
                                 num_workers=12,
                                 shuffle=False,
                                 pin_memory=False)

    model = SeResNext50_Unet_9ch(pretrained=None)  #.cuda()

    params = model.parameters()

    optimizer = AdamW(params, lr=0.0002, weight_decay=4e-6)

    scheduler = lr_scheduler.MultiStepLR(optimizer,
                                         milestones=[4, 8, 10],
                                         gamma=0.5)

    model = nn.DataParallel(model).cuda()

    snap_to_load = 'res50_9ch_{}_0_best'.format(seed)
    print("=> loading checkpoint '{}'".format(snap_to_load))
    checkpoint = torch.load(path.join(models_folder, snap_to_load),
                            map_location='cpu')
    loaded_dict = checkpoint['state_dict']
    sd = model.state_dict()
    for k in model.state_dict():
        if k in loaded_dict and sd[k].size() == loaded_dict[k].size():
Exemplo n.º 11
0
    Path(opt.out_path).mkdir(parents=True, exist_ok=True)

    train_set = SyntheticDataset(opt.syn_path, opt.params_path, opt.blend, opt.channels)
    train_loader = DataLoader(dataset=train_set, num_workers=opt.threads, batch_size=opt.batch_size, shuffle=True, pin_memory=True)

    val_set = RealDataset(opt.real_path, opt.channels, split='val')
    val_loader = DataLoader(dataset=val_set, num_workers=0, batch_size=1, shuffle=False)

    test_set = RealDataset(opt.real_path, opt.channels, split='test')
    test_loader = DataLoader(dataset=test_set, num_workers=0, batch_size=1, shuffle=False)
    
    opt.n_classes = train_set.n_classes
    net = PowderNet(opt.arch, opt.n_channels, train_set.n_classes)
    net = net.cuda()
    optimizer = AdamW(net.parameters(), lr=opt.lr, weight_decay=opt.decay)
    scheduler = CosineLRWithRestarts(optimizer, opt.batch_size, len(train_set), opt.period, opt.t_mult)
    vis = Visualizer(server=opt.server, env=opt.env)
    start_epoch = 0
    if opt.resume is not None:
        checkpoint = torch.load(opt.resume)
        old_opt = checkpoint['opt']
        assert(old_opt.channels == opt.channels)
        assert(old_opt.bands == opt.bands)
        assert(old_opt.arch == opt.arch)
        assert(old_opt.blend == opt.blend)
        assert(old_opt.lr == opt.lr)
        assert(old_opt.decay == opt.decay)
        assert(old_opt.period == opt.period)
        assert(old_opt.t_mult == opt.t_mult)
Exemplo n.º 12
0
def main():
    args = cfg.parse_args()
    torch.cuda.manual_seed(args.random_seed)
    torch.cuda.manual_seed_all(args.random_seed)
    np.random.seed(args.random_seed)
    random.seed(args.random_seed)
    torch.backends.cudnn.deterministic = True


    # import network
    # args.gen_model is TransGAN_8_8_1 for example
    gen_net = eval('models.'+args.gen_model+'.Generator')(args=args).cuda()
    dis_net = eval('models.'+args.dis_model+'.Discriminator')(args=args).cuda()
    gen_net.set_arch(args.arch, cur_stage=2)

    print("The shit!")

    # weight init: Xavier Uniform
    def weights_init(m):
        classname = m.__class__.__name__
        if classname.find('Conv2d') != -1:
            if args.init_type == 'normal':
                nn.init.normal_(m.weight.data, 0.0, 0.02)
            elif args.init_type == 'orth':
                nn.init.orthogonal_(m.weight.data)
            elif args.init_type == 'xavier_uniform':
                nn.init.xavier_uniform(m.weight.data, 1.)
            else:
                raise NotImplementedError('{} unknown inital type'.format(args.init_type))
        elif classname.find('BatchNorm2d') != -1:
            nn.init.normal_(m.weight.data, 1.0, 0.02)
            nn.init.constant_(m.bias.data, 0.0)
    
    gen_net.apply(weights_init)
    dis_net.apply(weights_init)

    gpu_ids = [i for i in range(int(torch.cuda.device_count()))]
    gen_net = torch.nn.DataParallel(gen_net.to("cuda:0"), device_ids=gpu_ids)
    dis_net = torch.nn.DataParallel(dis_net.to("cuda:0"), device_ids=gpu_ids)
    
    

    # print(gen_net.module.cur_stage)

    if args.optimizer == "adam":
        gen_optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, gen_net.parameters()),
                                        args.g_lr, (args.beta1, args.beta2))
        dis_optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, dis_net.parameters()),
                                        args.d_lr, (args.beta1, args.beta2))
    elif args.optimizer == "adamw":
        gen_optimizer = AdamW(filter(lambda p: p.requires_grad, gen_net.parameters()),
                                        args.g_lr, weight_decay=args.wd)
        dis_optimizer = AdamW(filter(lambda p: p.requires_grad, dis_net.parameters()),
                                         args.g_lr, weight_decay=args.wd)
    gen_scheduler = LinearLrDecay(gen_optimizer, args.g_lr, 0.0, 0, args.max_iter * args.n_critic)
    dis_scheduler = LinearLrDecay(dis_optimizer, args.d_lr, 0.0, 0, args.max_iter * args.n_critic)

    # fid stat
    if args.dataset.lower() == 'cifar10':
        fid_stat = 'fid_stat/fid_stats_cifar10_train.npz'
    elif args.dataset.lower() == 'stl10':
        fid_stat = 'fid_stat/stl10_train_unlabeled_fid_stats_48.npz'
    elif args.fid_stat is not None:
        fid_stat = args.fid_stat
    else:
        raise NotImplementedError  # (f"no fid stat for %s"%args.dataset.lower()")
    assert os.path.exists(fid_stat)

    dataset = datasets.ImageDataset(args, cur_img_size=8)
    train_loader = dataset.train

    writer=SummaryWriter()
    writer_dict = {'writer':writer}
    writer_dict["train_global_steps"]=0
    writer_dict["valid_global_steps"]=0

    best = 1e4
    for epoch in range(args.max_epoch):

        train(args, gen_net = gen_net, dis_net = dis_net, gen_optimizer = gen_optimizer, dis_optimizer = dis_optimizer, gen_avg_param = None, train_loader = train_loader,
            epoch = epoch, writer_dict = writer_dict, fixed_z = None, schedulers=[gen_scheduler, dis_scheduler])

        checkpoint = {'epoch':epoch, 'best_fid':best}
        checkpoint['gen_state_dict'] = gen_net.state_dict()
        checkpoint['dis_state_dict'] = dis_net.state_dict()
        score = validate(args, None, fid_stat, epoch, gen_net, writer_dict, clean_dir=True)
        # print these scores, is it really the latest
        print(f'FID score: {score} - best ID score: {best} || @ epoch {epoch}.')
        if epoch == 0 or epoch > 30:
            if score < best:
                save_checkpoint(checkpoint, is_best=(score<best), output_dir=args.output_dir)
                print("Saved Latest Model!")
                best = score

    checkpoint = {'epoch':epoch, 'best_fid':best}
    checkpoint['gen_state_dict'] = gen_net.state_dict()
    checkpoint['dis_state_dict'] = dis_net.state_dict()
    score = validate(args, None, fid_stat, epoch, gen_net, writer_dict, clean_dir=True)
    save_checkpoint(checkpoint, is_best=(score<best), output_dir=args.output_dir)
Exemplo n.º 13
0
def train_gan(zq=256,
              ze=512,
              batch_size=32,
              outdir=".",
              name="tmp",
              dry=False,
              **kwargs):
    if not dry:
        tensorboard_path = Path(outdir) / 'tensorboard' / name
        model_path = Path(outdir) / 'models' / name
        tensorboard_path.mkdir(exist_ok=True, parents=True)
        model_path.mkdir(exist_ok=True, parents=True)

        sw = SummaryWriter(str(tensorboard_path))

    netT = resnet20().to(device)
    # netT = SimpleConvNet(bias=False).to(device)
    netH = HyperNet(netT, ze, zq).to(device)

    print("Loading pretrained generators...")
    pretrain = torch.load('pretrained.pt')
    netH.load_state_dict(pretrain['netH'])
    netD = SimpleLinearNet(
        [zq * batch_size, zq * batch_size // 2, zq * batch_size // 4, 1024, 1],
        final_sigmoid=True,
        batchnorm=False).to(device)

    print(netT, netH, netD)
    print(f"netT params: {param_count(netT)}")
    print(f"netH params: {param_count(netH)}")
    print(f"netD params: {param_count(netD)}")
    generator_count = param_layer_count(netT)

    optimH = AdamW(netH.parameters(),
                   lr=1e-4,
                   betas=(0.5, 0.9),
                   weight_decay=1e-4)
    optimD = AdamW(netD.parameters(),
                   lr=5e-5,
                   betas=(0.5, 0.9),
                   weight_decay=1e-4)

    g_loss_meter, d_loss_meter = AverageMeter(), AverageMeter()
    d_acc_meter = AverageMeter()
    gp_meter = AverageMeter()
    dgrad_meter = AverageMeter()

    adversarial_loss = nn.BCELoss()
    real_label, fake_label = 0, 1
    label = torch.zeros((generator_count, 1), device=device)

    ops = 0
    start_time = time.time()
    minibatch_count = 1562
    for epoch in range(100000):
        d_loss_meter.reset()
        g_loss_meter.reset()
        d_acc_meter.reset()
        gp_meter.reset()
        dgrad_meter.reset()
        # schedH.step()
        # schedD.step()
        for batch_idx in range(minibatch_count):
            n_iter = epoch * minibatch_count + batch_idx

            netH.zero_grad()
            netD.zero_grad()
            z = fast_randn((batch_size, ze), device=device, requires_grad=True)
            q = netH.encoder(z).view(-1, generator_count, zq)

            # Z Adversary
            free_params([netD])
            freeze_params([netH])

            codes = q.permute((1, 0, 2)).contiguous().view(generator_count, -1)
            noise = fast_randn((generator_count, zq * batch_size),
                               device=device,
                               requires_grad=True)
            d_real = netD(noise)
            d_fake = netD(codes)
            d_real_loss = adversarial_loss(d_real, label.fill_(real_label))
            d_real_loss.backward(retain_graph=True)
            d_fake_loss = adversarial_loss(d_fake, label.fill_(fake_label))
            d_fake_loss.backward(retain_graph=True)
            d_loss = d_real_loss + d_fake_loss
            # gp = calc_gradient_penalty(netD, noise, codes, device=device)
            # d_loss = d_fake.mean() - d_real.mean() + 10 * gp
            # d_loss.backward(retain_graph=True)
            dgrad_meter.update(model_grad_norm(netD))
            d_loss_meter.update(d_loss.item())
            d_acc_meter.update((sum(d_real < 0.5) + sum(d_fake > 0.5)).item() /
                               (generator_count * 2))
            # gp_meter.update(gp.item())

            optimD.step()
            # schedD.batch_step()
            # Train the generator
            freeze_params([netD])
            free_params([netH])

            # fool the discriminator
            # d_fake_loss = -d_fake.mean()
            # d_fake_loss.backward()

            d_fake_loss = adversarial_loss(d_fake, label.fill_(real_label))
            d_fake_loss.backward(retain_graph=True)

            optimH.step()

            with torch.no_grad():
                """ Update Statistics """
                if batch_idx % 50 == 0:
                    current_time = time.time()
                    ops_per_sec = ops // (current_time - start_time)
                    start_time = current_time
                    ops = 0
                    print("*" * 70 + " " + name)
                    print("{}/{} D Loss: {}".format(epoch, batch_idx,
                                                    d_loss.item()))
                    print("{} ops/s".format(ops_per_sec))

                ops += batch_size

                if batch_idx > 1 and batch_idx % 199 == 0:
                    if not dry:
                        sw.add_scalar('G/loss', g_loss_meter.avg, n_iter)
                        sw.add_scalar('D/loss', d_loss_meter.avg, n_iter)
                        sw.add_scalar('D/acc', d_acc_meter.avg, n_iter)
                        sw.add_scalar('D/gp', gp_meter.avg, n_iter)
                        sw.add_scalar('D/gradnorm', dgrad_meter.avg, n_iter)
                        netH.eval()
                        netH_samples = [
                            netH(fast_randn((batch_size, ze)).cuda())
                            for _ in range(10)
                        ]
                        netH.train()
                        sw.add_scalar(
                            'G/g_var',
                            sum(
                                x.std(0).mean() for v in netH_samples
                                for x in v[1].values()) /
                            (generator_count * 10), n_iter)
                        sw.add_scalar(
                            'G/q_var',
                            torch.cat([
                                s[0].view(-1, zq) for s in netH_samples
                            ]).var(0).mean(), n_iter)

                        if kwargs['embeddings']:
                            sw.add_embedding(
                                q.view(-1, zq),
                                global_step=n_iter,
                                tag="q",
                                metadata=list(range(generator_count)) *
                                batch_size)

                        torch.save(
                            {
                                'netH': netH.state_dict(),
                                'netD': netD.state_dict()
                            }, str(model_path / 'pretrain.pt'))
Exemplo n.º 14
0
    def optimization_algorithms(SCI_optimizer, cnn, LR, SCI_SGD_MOMENTUM,
                                REGULARIZATION):

        if type(SCI_optimizer) is str:
            if (SCI_optimizer == 'Adam'):
                optimizer = optim.Adam(cnn.parameters(),
                                       lr=LR,
                                       betas=(0.01, 0.999),
                                       weight_decay=REGULARIZATION)
            if (SCI_optimizer == 'AMSGrad'):
                optimizer = optim.Adam(cnn.parameters(),
                                       lr=LR,
                                       betas=(0.01, 0.999),
                                       weight_decay=REGULARIZATION,
                                       amsgrad=True)
            if (SCI_optimizer == 'AdamW'):
                optimizer = AdamW(cnn.parameters(),
                                  lr=LR,
                                  betas=(0.01, 0.999),
                                  weight_decay=REGULARIZATION)
            if (SCI_optimizer == 'RMSprop'):
                optimizer = optim.RMSprop(cnn.parameters(), lr=LR)
            #if (SCI_optimizer == 'SparseAdam') or (int(SCI_optimizer) == 4) :
            #optimizer = optim.SparseAdam(cnn.parameters(), lr=LR)
            if (SCI_optimizer == 'SGD'):
                optimizer = optim.SGD(cnn.parameters(),
                                      lr=LR,
                                      momentum=SCI_SGD_MOMENTUM,
                                      weight_decay=REGULARIZATION)
            if (SCI_optimizer == 'Adadelta'):
                optimizer = optim.Adadelta(cnn.parameters(),
                                           lr=LR,
                                           weight_decay=REGULARIZATION)
            if (SCI_optimizer == 'Rprop'):
                optimizer = optim.Rprop(cnn.parameters(), lr=LR)
            #if (SCI_optimizer == 'Adagrad') or (int(SCI_optimizer) == 7) :
            #    optimizer = optim.Adagrad(cnn.parameters(), lr=LR, weight_decay=REGULARIZATION)
            if (SCI_optimizer == 'Adamax'):
                optimizer = optim.Adamax(cnn.parameters(),
                                         lr=LR,
                                         weight_decay=REGULARIZATION)
            if (SCI_optimizer == 'ASGD'):
                optimizer = optim.ASGD(cnn.parameters(),
                                       lr=LR,
                                       weight_decay=REGULARIZATION)
            #if (SCI_optimizer == 'LBFGS') or (int(SCI_optimizer) == 10) :
            #optimizer = optim.LBFGS(cnn.parameters(), lr=LR)
        else:
            if (int(SCI_optimizer) == 1):
                optimizer = optim.Adam(cnn.parameters(),
                                       lr=LR,
                                       betas=(0.01, 0.999),
                                       weight_decay=REGULARIZATION)
            if (int(SCI_optimizer) == 2):
                optimizer = optim.Adam(cnn.parameters(),
                                       lr=LR,
                                       betas=(0.01, 0.999),
                                       weight_decay=REGULARIZATION,
                                       amsgrad=True)
            if (int(SCI_optimizer) == 3):
                optimizer = AdamW(cnn.parameters(),
                                  lr=LR,
                                  betas=(0.01, 0.999),
                                  weight_decay=REGULARIZATION)
            if (int(SCI_optimizer) == 4):
                optimizer = optim.RMSprop(cnn.parameters(), lr=LR)
            #if (SCI_optimizer == 'SparseAdam') or (int(SCI_optimizer) == 4) :
            #optimizer = optim.SparseAdam(cnn.parameters(), lr=LR)
            if (int(SCI_optimizer) == 5):
                optimizer = optim.SGD(cnn.parameters(),
                                      lr=LR,
                                      momentum=SCI_SGD_MOMENTUM,
                                      weight_decay=REGULARIZATION)
            if (int(SCI_optimizer) == 6):
                optimizer = optim.Adadelta(cnn.parameters(),
                                           lr=LR,
                                           weight_decay=REGULARIZATION)
            if (int(SCI_optimizer) == 7):
                optimizer = optim.Rprop(cnn.parameters(), lr=LR)
            #if (SCI_optimizer == 'Adagrad') or (int(SCI_optimizer) == 7) :
            #    optimizer = optim.Adagrad(cnn.parameters(), lr=LR, weight_decay=REGULARIZATION)
            if (int(SCI_optimizer) == 8):
                optimizer = optim.Adamax(cnn.parameters(),
                                         lr=LR,
                                         weight_decay=REGULARIZATION)
            if (int(SCI_optimizer) == 9):
                optimizer = optim.ASGD(cnn.parameters(),
                                       lr=LR,
                                       weight_decay=REGULARIZATION)
            #if (SCI_optimizer == 'LBFGS') or (int(SCI_optimizer) == 10) :
            #optimizer = optim.LBFGS(cnn.parameters(), lr=LR)

        return optimizer
Exemplo n.º 15
0
	else:
		print('Received: {}'.format(architecture))
		raise Exception('Model must be one of: Transformer_features, LSTM_raw, LSTM_features, CNN_raw, CNN_features, SincNet_raw')
		

if cuda:
	cost = cost.cuda()
	model = model.cuda()

print('FunTimes: {:d} parameters'.format(sum(p.numel() for p in model.parameters())))

# Instantiate optimizer and learning rate scheduler
if optimizer_to_use == 'AMSGrad':
	optimizer = optim.Adam(model.parameters(), lr, weight_decay=weight_decay, amsgrad=True)
elif optimizer_to_use == 'AdamW':
	optimizer = AdamW(model.parameters(), lr, weight_decay=weight_decay)
elif optimizer_to_use == 'Adam':
	optimizer = optim.Adam(model.parameters(), lr, weight_decay=weight_decay)
elif optimizer_to_use == 'RMSProp':
	optimizer = optim.RMSprop(model.parameters(), lr,alpha=0.95, eps=1e-8, weight_decay=weight_decay) 
else:
	print('Optimizer selected: {}'.format(optimizer_to_use))
	raise Exception('Optimizer once be one of: AMSGrad, AdamW, Adam, RMSProp')

# Load last checkpoint if one exists
subprocess.call(['gsutil', 'cp', ''])
state_dict = None
state_dict = load_checkpoint(save_dir, restore_file, model, optimizer)
last_epoch = state_dict['last_epoch'] if state_dict is not None else -1
	
# Track validation performance for early stopping
Exemplo n.º 16
0
def main():
    args = cfg.parse_args()
    torch.cuda.manual_seed(args.random_seed)
    torch.cuda.manual_seed_all(args.random_seed)
    np.random.seed(args.random_seed)
    random.seed(args.random_seed)
    torch.backends.cudnn.deterministic = True

    # set tf env
    _init_inception()
    inception_path = check_or_download_inception(None)
    create_inception_graph(inception_path)

    # import network
    gen_net = eval('models.' + args.gen_model + '.Generator')(args=args).cuda()
    dis_net = eval('models.' + args.dis_model +
                   '.Discriminator')(args=args).cuda()
    gen_net.set_arch(args.arch, cur_stage=2)

    # weight init
    def weights_init(m):
        classname = m.__class__.__name__
        if classname.find('Conv2d') != -1:
            if args.init_type == 'normal':
                nn.init.normal_(m.weight.data, 0.0, 0.02)
            elif args.init_type == 'orth':
                nn.init.orthogonal_(m.weight.data)
            elif args.init_type == 'xavier_uniform':
                nn.init.xavier_uniform_(m.weight.data, 1.)
            else:
                raise NotImplementedError('{} unknown inital type'.format(
                    args.init_type))
        elif classname.find('BatchNorm2d') != -1:
            nn.init.normal_(m.weight.data, 1.0, 0.02)
            nn.init.constant_(m.bias.data, 0.0)

    gen_net.apply(weights_init)
    dis_net.apply(weights_init)

    gpu_ids = [i for i in range(int(torch.cuda.device_count()))]
    gen_net = torch.nn.DataParallel(gen_net.to("cuda:0"), device_ids=gpu_ids)
    dis_net = torch.nn.DataParallel(dis_net.to("cuda:0"), device_ids=gpu_ids)

    gen_net.module.cur_stage = 0
    dis_net.module.cur_stage = 0
    gen_net.module.alpha = 1.
    dis_net.module.alpha = 1.

    # set optimizer
    if args.optimizer == "adam":
        gen_optimizer = torch.optim.Adam(
            filter(lambda p: p.requires_grad, gen_net.parameters()), args.g_lr,
            (args.beta1, args.beta2))
        dis_optimizer = torch.optim.Adam(
            filter(lambda p: p.requires_grad, dis_net.parameters()), args.d_lr,
            (args.beta1, args.beta2))
    elif args.optimizer == "adamw":
        gen_optimizer = AdamW(filter(lambda p: p.requires_grad,
                                     gen_net.parameters()),
                              args.g_lr,
                              weight_decay=args.wd)
        dis_optimizer = AdamW(filter(lambda p: p.requires_grad,
                                     dis_net.parameters()),
                              args.g_lr,
                              weight_decay=args.wd)
    gen_scheduler = LinearLrDecay(gen_optimizer, args.g_lr, 0.0, 0,
                                  args.max_iter * args.n_critic)
    dis_scheduler = LinearLrDecay(dis_optimizer, args.d_lr, 0.0, 0,
                                  args.max_iter * args.n_critic)

    # fid stat
    if args.dataset.lower() == 'cifar10':
        fid_stat = 'fid_stat/fid_stats_cifar10_train.npz'
    elif args.dataset.lower() == 'stl10':
        fid_stat = 'fid_stat/stl10_train_unlabeled_fid_stats_48.npz'
    elif args.fid_stat is not None:
        fid_stat = args.fid_stat
    else:
        raise NotImplementedError(f'no fid stat for {args.dataset.lower()}')
    assert os.path.exists(fid_stat)

    # epoch number for dis_net
    args.max_epoch = args.max_epoch * args.n_critic
    dataset = datasets.ImageDataset(args, cur_img_size=8)
    train_loader = dataset.train
    if args.max_iter:
        args.max_epoch = np.ceil(args.max_iter * args.n_critic /
                                 len(train_loader))

    # initial
    fixed_z = torch.cuda.FloatTensor(
        np.random.normal(0, 1, (64, args.latent_dim)))
    gen_avg_param = copy_params(gen_net)
    start_epoch = 0
    best_fid = 1e4

    # set writer
    if args.load_path:
        print(f'=> resuming from {args.load_path}')
        assert os.path.exists(args.load_path)
        checkpoint_file = os.path.join(args.load_path)
        assert os.path.exists(checkpoint_file)
        checkpoint = torch.load(checkpoint_file)
        start_epoch = checkpoint['epoch']
        best_fid = checkpoint['best_fid']
        gen_net.load_state_dict(checkpoint['gen_state_dict'])
        dis_net.load_state_dict(checkpoint['dis_state_dict'])
        gen_optimizer.load_state_dict(checkpoint['gen_optimizer'])
        dis_optimizer.load_state_dict(checkpoint['dis_optimizer'])
        avg_gen_net = deepcopy(gen_net)
        avg_gen_net.load_state_dict(checkpoint['avg_gen_state_dict'])
        gen_avg_param = copy_params(avg_gen_net)
        del avg_gen_net
        cur_stage = cur_stages(start_epoch, args)
        gen_net.module.cur_stage = cur_stage
        dis_net.module.cur_stage = cur_stage
        gen_net.module.alpha = 1.
        dis_net.module.alpha = 1.

        # args.path_helper = checkpoint['path_helper']

    else:
        # create new log dir
        assert args.exp_name
    args.path_helper = set_log_dir('logs', args.exp_name)
    logger = create_logger(args.path_helper['log_path'])

    logger.info(args)
    writer_dict = {
        'writer': SummaryWriter(args.path_helper['log_path']),
        'train_global_steps': start_epoch * len(train_loader),
        'valid_global_steps': start_epoch // args.val_freq,
    }

    # train loop
    for epoch in tqdm(range(int(start_epoch), int(args.max_epoch)),
                      desc='total progress'):
        lr_schedulers = (gen_scheduler,
                         dis_scheduler) if args.lr_decay else None
        train(args, gen_net, dis_net, gen_optimizer, dis_optimizer,
              gen_avg_param, train_loader, epoch, writer_dict, lr_schedulers)

        if epoch and epoch % args.val_freq == 0 or epoch == int(
                args.max_epoch) - 1:
            backup_param = copy_params(gen_net)
            load_params(gen_net, gen_avg_param)
            inception_score, fid_score = validate(args, fixed_z, fid_stat,
                                                  epoch, gen_net, writer_dict)
            logger.info(
                f'Inception score: {inception_score}, FID score: {fid_score} || @ epoch {epoch}.'
            )
            load_params(gen_net, backup_param)
            if fid_score < best_fid:
                best_fid = fid_score
                is_best = True
            else:
                is_best = False
        else:
            is_best = False

        avg_gen_net = deepcopy(gen_net)
        load_params(avg_gen_net, gen_avg_param)
        save_checkpoint(
            {
                'epoch': epoch + 1,
                'gen_model': args.gen_model,
                'dis_model': args.dis_model,
                'gen_state_dict': gen_net.state_dict(),
                'dis_state_dict': dis_net.state_dict(),
                'avg_gen_state_dict': avg_gen_net.state_dict(),
                'gen_optimizer': gen_optimizer.state_dict(),
                'dis_optimizer': dis_optimizer.state_dict(),
                'best_fid': best_fid,
                'path_helper': args.path_helper
            }, is_best, args.path_helper['ckpt_path'])
        del avg_gen_net
Exemplo n.º 17
0
    def objective(SCI_RELU, SCI_BIAS, SCI_loss_type, SCI_optimizer, SCI_BATCH_SIZE, SCI_MM, SCI_REGULARIZATION, SCI_LR, SCI_DROPOUT, SCI_L_SECOND, SCI_EPOCHS, SCI_BN_MOMENTUM, SCI_SGD_MOMENTUM, SCI_LINEARITY):
        global device, MaxCredit  
        global count, CreditVector, CreditVec
        
        SCI_BATCH_SIZE = int(SCI_BATCH_SIZE)  # integer between 4 and 256
        SCI_MM = round(SCI_MM, 3)  # real with three decimals between (0.001, 0.999)
        SCI_REGULARIZATION = round(SCI_REGULARIZATION, 3)  # real with three decimals between (0.001, 0.7)
        SCI_LR = round(SCI_LR, 5)  # real with five decimals between(1e-4, 7e-1)            
        SCI_DROPOUT = round(SCI_DROPOUT, 2)  # real with two decimals between (0, 0.4)
        SCI_L_SECOND = int(SCI_L_SECOND)  # integer between 2 and 64
        SCI_EPOCHS = int(SCI_EPOCHS)  # integer between (100, 500)
        SCI_BN_MOMENTUM = round(SCI_BN_MOMENTUM, 2)  # real with two decimals between (0, 0.99)
        SCI_SGD_MOMENTUM = round(SCI_SGD_MOMENTUM, 2)  # real with two decimals between (0, 0.99) 
        SCI_optimizer = int(SCI_optimizer)  # integer between 1 and 4
        SCI_loss_type = int(SCI_loss_type)  # integer between 1 and 3 ('CrossEntropyLoss', 'MultiMarginLoss','NLLLoss')
        SCI_LINEARITY = int(SCI_LINEARITY)
        if int(SCI_RELU) == 1 :  # integer between 1 and 2 ('True', 'False')
            SCI_RELU = True      
        else:
            SCI_RELU = False      
        if int(SCI_BIAS) == 1 :  # integer between 1 and 2 ('True', 'False')
            SCI_BIAS = True      
        else:
            SCI_BIAS = False  
               
        from cnn_model import CNN6
        cnn = CNN6(L_FIRST, SCI_L_SECOND, KERNEL_X, SCI_BIAS, SCI_BN_MOMENTUM, SCI_RELU, SCI_DROPOUT, dataset.CLASSES, SCI_LINEARITY)     
    
        if GPU_SELECT == 2:
            if torch.cuda.device_count() > 1:
                cnn = nn.DataParallel(cnn, device_ids=[0, 1], dim=0) 
            cnn = cnn.cuda()                
        if GPU_SELECT == 1:
            cnn.to(device)  
        if GPU_SELECT == 0:
            cnn.to(device)        

        # next(cnn.parameters()).is_cuda
        # print(cnn)  # net architecture   
        # list(cnn.parameters()) 
        cnn.apply(CNN6.weights_reset)        
        cnn.share_memory()
     
        loss_func = nn.CrossEntropyLoss()

        def create_loss(LOSS):   
            if LOSS == 1:
                loss_func = nn.CrossEntropyLoss()
            if LOSS == 2:
                loss_func = nn.NLLLoss()
            else:
                loss_func = nn.MultiMarginLoss()
            return loss_func

        MM = float(str(SCI_MM))
        REGULARIZATION = float(str(SCI_REGULARIZATION))
        # optimizer = str(SCI_optimizer)
        LR = float(str(SCI_LR))
        train_losses = []  # to track the training loss as the model trains
        output = 0
        loss = 0
        accuracy = 0
        early_stopping.counter = 0
        early_stopping.best_score = None
        early_stopping.early_stop = False
        early_stopping.verbose = False  
        TEST_RESULTS = torch.zeros(1, 2)
    
        loss_type = create_loss(SCI_loss_type)
    
        from adamw import AdamW
        
        if SCI_optimizer == 1:
            optimizer = optim.Adam(cnn.parameters(), lr=LR, betas=(0.9, 0.99), weight_decay=REGULARIZATION)
        if SCI_optimizer == 2:
            optimizer = optim.Adam(cnn.parameters(), lr=LR, betas=(0.9, 0.99), weight_decay=REGULARIZATION, amsgrad=True)
        if SCI_optimizer == 3:
            optimizer = AdamW(cnn.parameters(), lr=LR, betas=(0.9, 0.99), weight_decay=REGULARIZATION)           
        if SCI_optimizer == 4:
            optimizer = optim.SGD(cnn.parameters(), lr=LR, momentum=SCI_SGD_MOMENTUM, weight_decay=REGULARIZATION)
        if SCI_optimizer == 5:
            optimizer = optim.Adadelta(cnn.parameters(), lr=LR, weight_decay=REGULARIZATION)
        if SCI_optimizer == 6:
            optimizer = optim.Adagrad(cnn.parameters(), lr=LR, weight_decay=REGULARIZATION)
    
        from Utillities import Utillities
        Utillities.listing(optimizer, SCI_SGD_MOMENTUM, SCI_BN_MOMENTUM, SCI_L_SECOND, SCI_LR, SCI_RELU, SCI_BIAS, SCI_loss_type, REGULARIZATION, SCI_BATCH_SIZE, SCI_DROPOUT, SCI_LINEARITY)
    
        # Data Loader for easy mini-batch return in training
        SCI_BATCH_SIZE = int(SCI_BATCH_SIZE)
        train_loader = Data.DataLoader(dataset=dataset.train_dataset, batch_size=SCI_BATCH_SIZE, shuffle=True, num_workers=0, drop_last=True, pin_memory=True)
        validation_loader = Data.DataLoader(dataset=dataset.validation_dataset, batch_size=144, shuffle=True, num_workers=0, drop_last=True, pin_memory=True)    
        test_loader = Data.DataLoader(dataset=dataset.test_dataset, batch_size=599, shuffle=True, num_workers=0, drop_last=True, pin_memory=True)
    
        for epoch in range(SCI_EPOCHS):
            loss = None        
            cnn.train().cuda()
            for step, (train_data, train_target) in enumerate(train_loader):   
                train_data, train_target = train_data.to(device), train_target.to(device)
                output, temp = cnn(train_data)  # forward pass: compute predicted outputs by passing inputs to the model     
                loss = loss_func(output, train_target)
                train_losses.append(loss.item())  # record training loss 
                loss.backward()  # backward pass: compute gradient of the loss with respect to model parameters
                optimizer.zero_grad()
                optimizer.step()  # perform a single optimization step (parameter update)
      
            cnn.eval().cuda()  # switch to evaluation (no change) mode           
            valid_loss = 0
            accuracy = 0
            running_loss = 0.0
            with torch.no_grad():
                for step, (validation_data, validation_target) in enumerate(validation_loader):
                    validation_data, validation_target = validation_data.to(device), validation_target.to(device)
                    output, temp = cnn(validation_data)  # forward pass: compute predicted outputs by passing inputs to the model
                    valid_loss += loss_func(output, validation_target).item()
                    # ps = torch.exp(output)
                    # equality = (validation_target[0].data == ps.max(dim=1)[1])
                    # accuracy += equality.type(torch.FloatTensor).mean()    
                    # print('valid_loss: ', valid_loss)
                    
                    # print statistics
                running_loss += valid_loss
                if epoch % 100 == 0: 
                    print('average loss: %.6f' % (running_loss))
                    running_loss = 0.0
                   
            train_losses = []
            early_stopping(valid_loss, cnn)
        
            if early_stopping.early_stop:
                if os.path.exists('checkpoint.pt'):
                    # cnn = TheModelClass(*args, **kwargs)
                    print("Loaded the model with the lowest Validation Loss!")
                    cnn.load_state_dict(torch.load('checkpoint.pt'))  # Choose whatever GPU device number you want
                    cnn.to(device)
                break
      
        cnn.eval()
        class_correct = list(0. for i in range(1000))
        class_total = list(0. for i in range(1000))
        with torch.no_grad():
            for (test_data, test_target) in test_loader:
                test_data, test_target = test_data.to(device), test_target.to(device)
                outputs, temp = cnn(test_data)
                _, predicted = torch.max(outputs, 1)
                c = (predicted == test_target).squeeze()
                for i in range(test_target.size(0)):
                    label = test_target[i]
                    class_correct[label] += c[i].item()
                    class_total[label] += 1

        for i in range(dataset.CLASSES):
            TEST_RESULTS[0, i] = class_correct[i] / dataset.TESTED_ELEMENTS[i]
            print('Class: ', i, ' accuracy: ', TEST_RESULTS[0, i])   
            print('Class: ', i, ' correct: ', class_correct[i], ' of ', dataset.TESTED_ELEMENTS[i])
        percent = (TEST_RESULTS[0, 0] + TEST_RESULTS[0, 1]) / 2
        print('Final percentage: ', percent)
    
        CreditCost = int((1 - TEST_RESULTS[0, 0]) * dataset.TESTED_ELEMENTS[0] + (1 - TEST_RESULTS[0, 1]) * dataset.TESTED_ELEMENTS[1] * 5)
        
        if TEST_RESULTS[0, 0] == 0 or TEST_RESULTS[0, 1] == 0 :
            CreditCost = CreditCost + 300
    
        print('Last epoch: ', epoch)
        print('Credit Cost: ', -CreditCost)
        # list(cnn.parameters())
    
        if os.path.exists('checkpoint.pt'):  
            os.remove('checkpoint.pt') 

        print()
        
        print()
        
        if -CreditCost > MaxCredit : 
            MaxCredit = -CreditCost
        print('Best Score So Far: ', MaxCredit)    
        
        CreditVector[count] = MaxCredit    
        CreditVec[count] = count
        # plot the data
        fig = plt.figure()
        ax = fig.add_subplot(1, 1, 1)
        ax.plot(CreditVec, -CreditVector, color='tab:blue')
        # print(CreditVec, -CreditVector)
        count = count + 1
        # display the plot
        plt.show()
        
        return -CreditCost
Exemplo n.º 18
0
                                   batch_size=batch_size,
                                   num_workers=6,
                                   shuffle=True,
                                   pin_memory=False,
                                   drop_last=True)
    val_data_loader = DataLoader(val_train,
                                 batch_size=val_batch_size,
                                 num_workers=6,
                                 shuffle=False,
                                 pin_memory=False)

    model = SeResNext50_Unet_Double().cuda()

    params = model.parameters()

    optimizer = AdamW(params, lr=0.00001, weight_decay=1e-6)

    model, optimizer = amp.initialize(model, optimizer, opt_level="O1")

    scheduler = lr_scheduler.MultiStepLR(optimizer,
                                         milestones=[
                                             1, 2, 3, 4, 5, 7, 9, 11, 17, 23,
                                             29, 33, 47, 50, 60, 70, 90, 110,
                                             130, 150, 170, 180, 190
                                         ],
                                         gamma=0.5)

    model = nn.DataParallel(model).cuda()

    snap_to_load = 'res50_cls_cce_{}_0_best'.format(seed)
    print("=> loading checkpoint '{}'".format(snap_to_load))
Exemplo n.º 19
0
    def objective(SCI_SGD_MOMENTUM, SCI_DROPOUT, SCI_BATCH_SIZE, SCI_L_SECOND, SCI_optimizer, LINEARITY):
        global SCI_REGULARIZATION, SCI_EPOCHS, SCI_loss_type, SCI_RELU
        global SCI_BIAS, SCI_BN_MOMENTUM, device, SCI_LR, MaxCredit, count, CreditVector, CreditVec
        
        SCI_SGD_MOMENTUM = SCI_SGD_MOMENTUM / 10
        DROPOUT = (SCI_DROPOUT / 2).item()
        if SCI_DROPOUT < 0 :
            DROPOUT = 0

        BATCH_SIZE = int(SCI_BATCH_SIZE)
        
        if SCI_L_SECOND < 4 :
            SCI_L_SECOND = 4
            
        if SCI_optimizer < 1 :
            SCI_optimizer = 1
        
        L_SECOND = int(SCI_L_SECOND)
        
        loss_func = nn.CrossEntropyLoss()

        def create_loss(LOSS):   
            if LOSS == 'CrossEntropyLoss':
                loss_func = nn.CrossEntropyLoss()
            if LOSS == 'NLLLoss':
                loss_func = nn.NLLLoss()
            else:
                loss_func = nn.MultiMarginLoss()
            return loss_func

        REGULARIZATION = float(str(SCI_REGULARIZATION))
        optimizer1 = str(SCI_optimizer)

        from cnn_model import CNN6      
        cnn = CNN6(L_FIRST, L_SECOND, KERNEL_X, SCI_BIAS, SCI_BN_MOMENTUM, SCI_RELU, DROPOUT, dataset.CLASSES, LINEARITY)     
    
        if GPU_SELECT == 2:
            if torch.cuda.device_count() > 1:
                cnn = nn.DataParallel(cnn, device_ids=[0, 1], dim=0) 
            cnn = cnn.cuda()
        if GPU_SELECT == 1:
            cnn.to(device)  
        if GPU_SELECT == 0:
            cnn.to(device)        

        cnn.apply(CNN6.weights_reset)
        cnn.share_memory()

        train_losses = []  # to track the training loss as the model trains
        output = 0
        loss = 0
        accuracy = 0
        early_stopping.counter = 0
        early_stopping.best_score = None
        early_stopping.early_stop = False
        early_stopping.verbose = False  
        TEST_RESULTS = torch.zeros(1, 2)
    
        loss_type = create_loss(SCI_loss_type)
        
        from adamw import AdamW
        
        if optimizer1 == '1':
            optimizer = optim.Adam(cnn.parameters(), lr=SCI_LR, betas=(0.9, 0.99), weight_decay=REGULARIZATION)
        if optimizer1 == '2':
            optimizer = optim.Adam(cnn.parameters(), lr=SCI_LR, betas=(0.9, 0.99), weight_decay=REGULARIZATION, amsgrad=True)
        if optimizer1 == '3':
            optimizer = AdamW(cnn.parameters(), lr=SCI_LR, betas=(0.9, 0.99), weight_decay=REGULARIZATION)            
        if optimizer1 == '4':
            optimizer = optim.SGD(cnn.parameters(), lr=SCI_LR, momentum=SCI_SGD_MOMENTUM, weight_decay=REGULARIZATION)
        if optimizer1 == '5':
            optimizer = optim.Adadelta(cnn.parameters(), lr=SCI_LR, weight_decay=REGULARIZATION)
        if optimizer1 == '6':
            optimizer = optim.Adagrad(cnn.parameters(), lr=SCI_LR, weight_decay=REGULARIZATION)
        if optimizer1 > '6':           
            optimizer = optim.Adam(cnn.parameters(), lr=SCI_LR, betas=(0.9, 0.99), weight_decay=REGULARIZATION)
    
        from Utillities import Utillities
        Utillities.listing(optimizer, SCI_SGD_MOMENTUM, SCI_BN_MOMENTUM, L_SECOND, SCI_LR, SCI_RELU, SCI_BIAS, SCI_loss_type, REGULARIZATION, BATCH_SIZE, DROPOUT, LINEARITY)

        train_loader = Data.DataLoader(dataset=dataset.train_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=0, drop_last=True, pin_memory=True)
        validation_loader = Data.DataLoader(dataset=dataset.validation_dataset, batch_size=144, shuffle=False, num_workers=0, drop_last=True, pin_memory=True)    
        test_loader = Data.DataLoader(dataset=dataset.test_dataset, batch_size=599, shuffle=False, num_workers=0, pin_memory=True, drop_last=True)
    
        for epoch in range(SCI_EPOCHS):
            loss = None        
            cnn.train().cuda()
            for step, (train_data, train_target) in enumerate(train_loader):   
                train_data, train_target = train_data.to(device), train_target.to(device)
                output, temp = cnn(train_data)  # forward pass: compute predicted outputs by passing inputs to the model     
                loss = loss_func(output, train_target)
                train_losses.append(loss.item())  # record training loss 
                loss.backward()  # backward pass: compute gradient of the loss with respect to model parameters
                optimizer.zero_grad()
                optimizer.step()  # perform a single optimization step (parameter update)
      
            cnn.eval().cuda()  # switch to evaluation (no change) mode           
            valid_loss = 0
            accuracy = 0
            with torch.no_grad():
                for step, (validation_data, validation_target) in enumerate(validation_loader):
                    validation_data, validation_target = validation_data.to(device), validation_target.to(device)
                    output, temp = cnn(validation_data)  # forward pass: compute predicted outputs by passing inputs to the model
                    valid_loss += loss_func(output, validation_target).item()
                    ps = torch.exp(output)
                    equality = (validation_target[0].data == ps.max(dim=1)[1])
                    accuracy += equality.type(torch.FloatTensor).mean()      
               
            train_losses = []
            early_stopping(valid_loss, cnn)
       
            if early_stopping.early_stop:
                if os.path.exists('checkpoint.pt'):
                    print("Loaded the model with the lowest Validation Loss!")
                    cnn.load_state_dict(torch.load('checkpoint.pt', map_location="cuda:1"))  # Choose whatever GPU device number you want
                    cnn.to(device)
                break
      
        cnn.eval()
        class_correct = list(0. for i in range(1000))
        class_total = list(0. for i in range(1000))
        with torch.no_grad():
            for (test_data, test_target) in test_loader:
                test_data, test_target = test_data.to(device), test_target.to(device)
                outputs, temp = cnn(test_data)
                _, predicted = torch.max(outputs, 1)
                c = (predicted == test_target).squeeze()
                for i in range(test_target.size(0)):
                    label = test_target[i]
                    class_correct[label] += c[i].item()
                    class_total[label] += 1

        for i in range(dataset.CLASSES):
            TEST_RESULTS[0, i] = class_correct[i] / dataset.TESTED_ELEMENTS[i]
            print('Class: ', i, ' accuracy: ', TEST_RESULTS[0, i])
            print('Class: ', i, ' correct: ', class_correct[i])
        percent = (TEST_RESULTS[0, 0] + TEST_RESULTS[0, 1]) / 2
        print('Final percentage: ', percent)
    
        CreditCost = (1 - TEST_RESULTS[0, 0]) * dataset.TESTED_ELEMENTS[0] + (1 - TEST_RESULTS[0, 1]) * dataset.TESTED_ELEMENTS[1] * 5
    
        if TEST_RESULTS[0, 0] == 0 or TEST_RESULTS[0, 1] == 0 :
            CreditCost = CreditCost + 300
    
        print('Last epoch: ', epoch)
   
        if os.path.exists('checkpoint.pt'):  
            os.remove('checkpoint.pt') 

        print()
        torch.cuda.empty_cache()
        print()
        
        CreditCost = CreditCost + (SCI_SGD_MOMENTUM + SCI_DROPOUT + SCI_BATCH_SIZE + SCI_L_SECOND + SCI_optimizer) / 1000
        print('Credit Cost: ', CreditCost)
        
        if -CreditCost > MaxCredit : 
            MaxCredit = -CreditCost
        print('Best Score So Far: ', MaxCredit)   
        
        CreditVector[count] = MaxCredit    
        CreditVec[count] = count
        # plot the data
        fig = plt.figure()
        ax = fig.add_subplot(1, 1, 1)
        ax.plot(CreditVec, -CreditVector, color='tab:orange')
        # print(CreditVec, -CreditVector)
        count = count + 1
        # display the plot
        plt.show()
             
        return CreditCost
Exemplo n.º 20
0
def main(config):
  seed_all()
  os.makedirs('cache', exist_ok=True)
  os.makedirs(config.logdir, exist_ok=True)
  print("Logging to: %s" % config.logdir)
  src_files = sorted(glob('*.py'))
  for src_fn in src_files:
    dst_fn = os.path.join(config.logdir, src_fn)
    copyfile(src_fn, dst_fn)

  train_image_fns = sorted(glob(os.path.join(config.train_dir, '*/*/*.dcm')))
  test_image_fns = sorted(glob(os.path.join(config.test_dir, '*/*/*.dcm')))

  # assert len(train_image_fns) == 10712
  # assert len(test_image_fns) == 1377

  gt = load_gt(config.train_rle)
  # create folds
  np.random.shuffle(train_image_fns)

  if config.subset > 0:
    train_image_fns = train_image_fns[:config.subset]

  folds = np.arange(len(train_image_fns)) % config.num_folds
  val_image_fns = [fn for k, fn in enumerate(train_image_fns)
      if folds[k] == config.fold]
  train_image_fns = [fn for k, fn in enumerate(train_image_fns)
      if folds[k] != config.fold]
  # remove not-used files:
  # https://www.kaggle.com/c/siim-acr-pneumothorax-segmentation/discussion/98478#latest-572385  # noqa
  train_image_fns = [fn for fn in train_image_fns
      if DicomDataset.fn_to_id(fn) in gt]
  val_image_fns = [fn for fn in val_image_fns
      if DicomDataset.fn_to_id(fn) in gt]

  print("VAL: ", len(val_image_fns), os.path.basename(val_image_fns[0]))
  print("TRAIN: ", len(train_image_fns), os.path.basename(train_image_fns[0]))

  train_ds = DicomDataset(train_image_fns, gt_rles=gt, augment=True)
  val_ds = DicomDataset(val_image_fns, gt_rles=gt)

  if config.cache:
    train_ds.cache()
    val_ds.cache()

  val_loader = data.DataLoader(val_ds, batch_size=config.batch_size,
                               shuffle=False, num_workers=config.num_workers,
                               pin_memory=config.pin, drop_last=False)

  model = FPNSegmentation(config.slug, ema=config.ema)
  if config.weight is not None:
    print("Loading: %s" % config.weight)
    model.load_state_dict(th.load(config.weight))
  model = model.to(config.device)

  no_decay = ['mean', 'std', 'bias'] + ['.bn%d.' % i for i in range(100)]
  grouped_parameters = [{'params': [], 'weight_decay': config.weight_decay},
      {'params': [], 'weight_decay': 0.0}]
  for n, p in model.named_parameters():
    if not any(nd in n for nd in no_decay):
      print("Decay: %s" % n)
      grouped_parameters[0]['params'].append(p)
    else:
      print("No Decay: %s" % n)
      grouped_parameters[1]['params'].append(p)
  optimizer = AdamW(grouped_parameters, lr=config.lr)

  if config.apex:
    model, optimizer = apex.amp.initialize(model, optimizer, opt_level="O1",
                                           verbosity=0)

  updates_per_epoch = len(train_ds) // config.batch_size
  num_updates = int(config.epochs * updates_per_epoch)
  scheduler = WarmupLinearSchedule(warmup=config.warmup, t_total=num_updates)

  # training loop
  smooth = 0.1
  best_dice = 0.0
  best_fn = None
  global_step = 0
  for epoch in range(1, config.epochs + 1):
    smooth_loss = None
    smooth_accuracy = None
    model.train()
    train_loader = data.DataLoader(train_ds, batch_size=config.batch_size,
                                   shuffle=True, num_workers=config.num_workers,
                                   pin_memory=config.pin, drop_last=True)
    progress = tqdm(total=len(train_ds), smoothing=0.01)
    for i, (X, _, y_true) in enumerate(train_loader):
      X = X.to(config.device).float()
      y_true = y_true.to(config.device)
      y_pred = model(X)
      loss = siim_loss(y_true, y_pred, weights=None)
      if config.apex:
        with apex.amp.scale_loss(loss, optimizer) as scaled_loss:
          scaled_loss.backward()
      else:
        loss.backward()

      lr_this_step = None
      if (i + 1) % config.accumulation_step == 0:
        optimizer.step()
        optimizer.zero_grad()
        lr_this_step = config.lr * scheduler.get_lr(global_step, config.warmup)
        for param_group in optimizer.param_groups:
          param_group['lr'] = lr_this_step
        global_step += 1

      smooth_loss = loss.item() if smooth_loss is None else \
          smooth * loss.item() + (1. - smooth) * smooth_loss
      # print((y_true >= 0.5).sum().item())
      accuracy = th.mean(((y_pred >= 0.5) == (y_true == 1)).to(
          th.float)).item()
      smooth_accuracy = accuracy if smooth_accuracy is None else \
          smooth * accuracy + (1. - smooth) * smooth_accuracy
      progress.set_postfix(ep='%d/%d' % (epoch, config.epochs),
            loss='%.4f' % smooth_loss, accuracy='%.4f' %
            (smooth_accuracy), lr='%.6f' % (config.lr if lr_this_step is None
              else lr_this_step))
      progress.update(len(X))

    if epoch <= 12:
      continue
    # validation loop
    model.eval()
    thresholds = [0.1, 0.2]
    dice_coeffs = [[] for _ in range(len(thresholds))]
    progress = tqdm(enumerate(val_loader), total=len(val_loader))
    with th.no_grad():
      for i, (X, _, y_trues) in progress:
        X = X.to(config.device).float()
        y_trues = y_trues.to(config.device)
        y_preds = model(X)
        y_preds_flip = th.flip(model(th.flip(X, (-1, ))), (-1, ))
        y_preds = 0.5 * (y_preds + y_preds_flip)

        y_trues = y_trues.cpu().numpy()
        y_preds = y_preds.cpu().numpy()
        for yt, yp in zip(y_trues, y_preds):
          yt = (yt.squeeze() >= 0.5).astype('uint8')
          yp = yp.squeeze()
          for dind, threshold in enumerate(thresholds):
            yp_ = (yp >= threshold).astype(np.uint8)
            sc = score(yt, yp_)
            dice_coeffs[dind].append(sc)

    best_threshold_ind = -1
    dice_coeff = -1
    for dind, threshold in enumerate(thresholds):
      dc = np.mean([x[0] for x in dice_coeffs[dind] if x[1] == 'non-empty'])
      # progress.write("Dice @%.2f: %.4f" % (threshold, dc))
      if dc > dice_coeff:
        dice_coeff = dc
        best_threshold_ind = dind

    dice_coeffs = dice_coeffs[best_threshold_ind]
    num_empty = sum(1 for x in dice_coeffs if x[1] == 'empty')
    num_total = len(dice_coeffs)
    num_non_empty = num_total - num_empty
    empty_sum = np.sum([d[0] for d in dice_coeffs if d[1] == 'empty'])
    non_empty_sum = np.sum([d[0] for d in dice_coeffs if d[1] == 'non-empty'])
    dice_coeff_empty = empty_sum / num_empty
    dice_coeff_non_empty = non_empty_sum / num_non_empty
    progress.write('[Empty: %d]: %.3f | %.3f, [Non-Empty: %d]: %.3f | %.3f' % (
        num_empty, dice_coeff_empty, empty_sum / num_total,
        num_non_empty, dice_coeff_non_empty, non_empty_sum / num_total))
    dice_coeff = float(dice_coeff)
    summary_str = 'f%02d-ep-%04d-val_dice-%.4f@%.2f' % (config.fold, epoch,
        dice_coeff, thresholds[best_threshold_ind])
    progress.write(summary_str)
    if dice_coeff > best_dice:
      weight_fn = os.path.join(config.logdir, summary_str + '.pth')
      th.save(model.state_dict(), weight_fn)
      best_dice = dice_coeff
      best_fn = weight_fn
      fns = sorted(glob(os.path.join(config.logdir, 'f%02d-*.pth' %
          config.fold)))
      for fn in fns[:-config.n_keep]:
        os.remove(fn)

  # create submission
  test_ds = DicomDataset(test_image_fns)
  test_loader = data.DataLoader(test_ds, batch_size=config.batch_size,
                               shuffle=False, num_workers=0,
                               pin_memory=False, drop_last=False)
  if best_fn is not None:
    model.load_state_dict(th.load(best_fn))
  model.eval()
  sub = create_submission(model, test_loader, config, pred_zip=config.pred_zip)
  sub.to_csv(config.submission_fn, index=False)
  print("Wrote to: %s" % config.submission_fn)

  # create val submission
  val_fn = config.submission_fn.replace('.csv', '_VAL.csv')
  model.eval()
  sub = []
  sub = create_submission(model, val_loader, config,
      pred_zip=config.pred_zip.replace('.zip', '_VAL.zip'))
  sub.to_csv(val_fn, index=False)
  print("Wrote to: %s" % val_fn)
Exemplo n.º 21
0
def train(name, loader, checkpoint, num_rep, lr, beta1, gamma_gan, num_epochs,
          wd, device):
    discriminator = Discriminator().to(device)
    generator = Generator(num_rep).to(device)

    losses = {'D': [], 'G': []}

    optimizer_D = AdamW(discriminator.parameters(),
                        lr=lr,
                        weight_decay=wd,
                        betas=(beta1, 0.99))
    optimizer_G = AdamW(generator.parameters(),
                        lr=lr,
                        weight_decay=wd,
                        betas=(beta1, 0.99))

    bce = nn.BCELoss()
    mse = nn.MSELoss()
    normalizer = Normalizer(cfg.mean, cfg.std, device)

    if torch.cuda.device_count() > 1:
        generator = nn.DataParallel(generator)
        discriminator = nn.DataParallel(discriminator)

    save_path = Path('.') / 'save' / name
    if not save_path.is_dir():
        save_path.mkdir(parents=True)

    if checkpoint:
        losses = load_checkpoint(save_path, discriminator, generator,
                                 optimizer_D, optimizer_G)

    last_epoch = len(losses['D']) - 1
    logging.info('Last epoch={}'.format(last_epoch))

    for epoch in range(last_epoch + 1, num_epochs):
        losses_G = 0.0
        losses_D = 0.0
        loss_G_gan_acc = 0.0
        loss_G_M_acc = 0.0

        iter_count = 0

        for image, gt, _ in loader:
            batchsize = image.size(0)
            image, gt = image.to(device), gt.to(device)

            # Phrase 1: train the D
            discriminator.zero_grad()
            labels = torch.full((batchsize, 1), 1, device=device)
            output = discriminator(gt)
            D_x = output.mean().item()
            loss_D_real = bce(output, labels)
            loss_D_real.backward()

            fake = generator(image)
            fake = normalizer(fake)
            labels.fill_(0)
            output = discriminator(fake.detach())
            D_G_z1 = output.mean().item()
            loss_D_fake = bce(output, labels)
            loss_D_fake.backward()

            loss_D = loss_D_real.item() + loss_D_fake.item()
            optimizer_D.step()

            # Phrase 2: train the G

            generator.zero_grad()
            output = discriminator(fake)
            D_G_z2 = output.mean().item()
            labels.fill_(1)
            loss_G_gan = bce(output, labels)
            loss_G_gan_acc += loss_G_gan.item()

            loss_G_M = mse(fake, gt)
            loss_G_M_acc += loss_G_M.item()

            loss_G = gamma_gan * loss_G_gan + loss_G_M

            loss_G.backward()
            optimizer_G.step()

            losses_D += loss_D
            losses_G += loss_G.item()
            if iter_count % 20 == 0:
                logging.info(
                    "Iteration {} loss -- Loss D {:.4f}, "
                    "Loss G {:.4f}, D(x) {:.4f} D(g(z)) {:.4f} / {:.4f}".
                    format(iter_count, loss_D, loss_G, D_x, D_G_z1, D_G_z2))

            iter_count += 1

        logging.info("D Loss: {:.4f}, G Loss: {:.4f} at epoch {}.".format(
            losses_D, losses_G, epoch))
        logging.info('loss_G_gan_acc={:.4f}, loss_G_M_acc={:.4f}'.format(
            loss_G_gan_acc, loss_G_M_acc))
        losses['D'].append(losses_D)
        losses['G'].append(losses_G)

        if checkpoint:
            save_checkpoint(save_path, discriminator, generator, optimizer_D,
                            optimizer_G, losses)
Exemplo n.º 22
0
    def build_model(self):
        x1, x2, s1, s2 = self.x1_in, self.x2_in, self.s1_in, self.s2_in
        self.x_mask = Lambda(
            lambda x: K.cast(K.greater(K.expand_dims(x, 2), 0), 'float32'))(x1)

        x = self.bert_pretrained_model([x1, x2])

        # BiGRU + DNN
        #  # from https://github.com/hecongqing/CCKS2019EventEntityExtraction_Rank5/blob/master/src/SEBERT_model.py
        #  l = Lambda(lambda t: t[:, -1])(x)
        #  x = Add()([x, l])
        #  x = Dropout(0.1)(x)
        #  x = Lambda(lambda x: x[0] * x[1])([x, x_mask])
        #
        #  x = SpatialDropout1D(0.1)(x)
        #  x = Bidirectional(CuDNNGRU(200, return_sequences=True))(x)
        #  x = Lambda(lambda x: x[0] * x[1])([x, x_mask])
        #  x = Bidirectional(CuDNNGRU(200, return_sequences=True))(x)
        #  x = Lambda(lambda x: x[0] * x[1])([x, x_mask])
        #
        #  x = Dense(1024, use_bias=False, activation='tanh')(x)
        #  x = Dropout(0.2)(x)
        #  x = Dense(64, use_bias=False, activation='tanh')(x)
        #  x = Dropout(0.2)(x)
        #  x = Dense(8, use_bias=False, activation='tanh')(x)

        ps1 = Dense(1, use_bias=False)(x)
        ps1 = Lambda(lambda x: x[0][..., 0] - (1 - x[1][..., 0]) * 1e10)(
            [ps1, self.x_mask])
        ps2 = Dense(1, use_bias=False)(x)
        ps2 = Lambda(lambda x: x[0][..., 0] - (1 - x[1][..., 0]) * 1e10)(
            [ps2, self.x_mask])

        self.predict_model = Model([self.x1_in, self.x2_in], [ps1, ps2])

        train_model = Model([self.x1_in, self.x2_in, self.s1_in, self.s2_in],
                            [ps1, ps2])
        if Config.gpus > 1:
            train_model = multi_gpu_model(train_model, gpus=Config.gpus)

        def get_loss(y_true, y_pred, with_weights=False):
            weights = 0.0
            if with_weights:
                # 根据标签位置距离,计算分类权重。
                i_true = K.argmax(y_true, axis=1)
                i_pred = K.argmax(y_pred, axis=1)
                distance = K.abs(i_true - i_pred)
                weights = K.cast(distance, dtype='float32')
                #  length = K.int_shape(y_true)[1] - 1
                #  weights = K.cast(distance / length, dtype='float32')

            losses = (1.0 + weights) * K.categorical_crossentropy(
                y_true, y_pred, from_logits=True)
            #  losses = (
            #      (1.0 + weights) *
            #      K.categorical_crossentropy(y_true, y_pred, from_logits=True))
            loss = K.mean(losses)
            return loss

        #  loss1 = K.mean(
        #      K.categorical_crossentropy(self.s1_in, ps1, from_logits=True))
        #  ps2 -= (1 - K.cumsum(s1, 1)) * 1e10
        #  loss2 = K.mean(
        #      K.categorical_crossentropy(self.s2_in, ps2, from_logits=True))
        #  self.loss = loss1 + loss2
        loss1 = get_loss(self.s1_in, ps1, with_weights=True)
        ps2 -= (1 - K.cumsum(s1, 1)) * 1e10
        loss2 = get_loss(self.s2_in, ps2, with_weights=True)
        self.loss = loss1 + loss2

        train_model.add_loss(self.loss)

        if 'COLAB_TPU_ADDR' in os.environ:
            train_model.compile(
                #optimizer=tf.train.RMSPropOptimizer(self.learning_rate))
                optimizer=RMSprop())
        else:
            #  from accum_optimizer import AccumOptimizer
            #  train_model.compile(optimizer=AccumOptimizer(
            #      Adam(self.learning_rate), steps_per_update))
            train_model.compile(optimizer=AdamW(self.learning_rate))
        train_model.summary()

        self.train_model = train_model
    if args.distributed:
        model = nn.SyncBatchNorm.convert_sync_batchnorm(model, pg)

    model = model.cuda()


    params = model.parameters()

    # param_optimizer = list(model.named_parameters())
    # no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight']
    # optimizer_grouped_parameters = [
    #     {'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)], 'weight_decay': 0.0001},
    #     {'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay': 0.0}
    # ] 

    optimizer = AdamW(params, lr=2e-5) # SGD(params, lr=0.01, momentum=0.9) #, weight_decay=1e-4) #AdamW(params, lr=5e-4) #SGD(params, lr=0.04, momentum=0.9, weight_decay=1e-4) #params) #, nesterov=True #AdamW(params, lr=1e-4)  #SGD(params, lr=0.001, momentum=0.9, weight_decay=1e-7, nesterov=True) #AdamW(params, lr=1e-3, weight_decay=0.1) #Novograd(params, lr=4e-4, weight_decay=2e-5) #AdamW(params, lr=1e-4, weight_decay=0.15)

    # model, optimizer = amp.initialize(model, optimizer, opt_level="O2")

    if args.distributed:
        model = nn.parallel.DistributedDataParallel(model, device_ids=[args.local_rank],
            output_device=args.local_rank, find_unused_parameters=True)

    loss_scaler = torch.cuda.amp.GradScaler()

    
    snap_to_load = 'eff6_4k_{0}_best_full2_0'.format(fold)
    if args.local_rank == 0:
        print("=> loading checkpoint '{}'".format(snap_to_load))
    checkpoint = torch.load(path.join(models_folder, snap_to_load), map_location='cpu')
    loaded_dict = checkpoint['state_dict']
Exemplo n.º 24
0
    train_data_loader = DataLoader(data_train,
                                   batch_size=batch_size,
                                   num_workers=4,
                                   shuffle=True,
                                   pin_memory=True)
    val_data_loader = DataLoader(val_train,
                                 batch_size=batch_size,
                                 num_workers=2,
                                 shuffle=False,
                                 pin_memory=True)

    model = nn.DataParallel(Dpn92_9ch_Unet()).cuda()

    params = model.parameters()

    optimizer = AdamW(params, lr=1e-4, weight_decay=1e-4)
    scheduler = lr_scheduler.MultiStepLR(optimizer,
                                         milestones=[4, 12, 22],
                                         gamma=0.5)
    loss_function = ComboLoss({
        'dice': 1.0,
        'focal': 10.0
    }, per_image=True).cuda()

    l1_loss = torch.nn.SmoothL1Loss().cuda()

    best_score = 0
    for epoch in range(25):
        train_epoch(epoch, loss_function, l1_loss, model, optimizer, scheduler,
                    train_data_loader)
        torch.save(
Exemplo n.º 25
0
def train(args):
    model = models.myecgnet()
    if args.ckpt and not args.resume:
        state = torch.load(args.ckpt, map_location='cpu')
        model.load_state_dict(state['state_dict'])
        print('train with pretrained weight val_f1', state['f1'])
    model = model.to(device)
    train_dataset = ECGDataset(data_path=config.train_data, train=True)
    train_dataloader = DataLoader(train_dataset,
                                  collate_fn=my_collate_fn,
                                  batch_size=config.batch_size,
                                  shuffle=True,
                                  num_workers=8)
    val_dataset = ECGDataset(data_path=config.train_data, train=False)
    val_dataloader = DataLoader(val_dataset,
                                batch_size=config.batch_size,
                                num_workers=8)
    print("train_datasize", len(train_dataset), "val_datasize",
          len(val_dataset))
    optimizer = AdamW(model.parameters(), lr=config.lr)
    w = torch.tensor(train_dataset.wc, dtype=torch.float).to(device)
    criterion = utils.WeightedMultilabel(w)
    model_save_dir = '%s/%s_%s' % (config.ckpt, config.model_name,
                                   time.strftime("%Y%m%d%H%M"))
    os.mkdir(model_save_dir)
    if args.ex: model_save_dir += args.ex
    best_f1 = -1
    lr = config.lr
    start_epoch = 1
    stage = 1
    if args.resume:
        if os.path.exists(args.ckpt):
            model_save_dir = args.ckpt
            current_w = torch.load(os.path.join(args.ckpt, config.current_w))
            best_w = torch.load(os.path.join(model_save_dir, config.best_w))
            best_f1 = best_w['loss']
            start_epoch = current_w['epoch'] + 1
            lr = current_w['lr']
            stage = current_w['stage']
            model.load_state_dict(current_w['state_dict'])
            if start_epoch - 1 in config.stage_epoch:
                stage += 1
                lr /= config.lr_decay
                utils.adjust_learning_rate(optimizer, lr)
                model.load_state_dict(best_w['state_dict'])
            print("=> loaded checkpoint (epoch {})".format(start_epoch - 1))
    for epoch in range(start_epoch, config.max_epoch + 1):
        since = time.time()
        train_loss, train_f1 = train_epoch(model,
                                           optimizer,
                                           criterion,
                                           train_dataloader,
                                           show_interval=10)
        val_loss, val_f1 = val_epoch(model, criterion, val_dataloader)
        print(
            '#epoch:%03d\tstage:%d\ttrain_loss:%.4f\ttrain_f1:%.3f\tval_loss:%0.4f\tval_f1:%.3f\ttime:%s\n'
            % (epoch, stage, train_loss, train_f1, val_loss, val_f1,
               utils.print_time_cost(since)))
        state = {
            "state_dict": model.state_dict(),
            "epoch": epoch,
            "loss": val_loss,
            'f1': val_f1,
            'lr': lr,
            'stage': stage
        }
        save_ckpt(state, best_f1 < val_f1, model_save_dir)
        best_f1 = max(best_f1, val_f1)
        if epoch in config.stage_epoch:
            stage += 1
            lr /= config.lr_decay
            best_w = os.path.join(model_save_dir, config.best_w)
            model.load_state_dict(torch.load(best_w)['state_dict'])
            print("*" * 10, "step into stage%02d lr %.3ef" % (stage, lr))
            utils.adjust_learning_rate(optimizer, lr)