Exemplo n.º 1
0
def pretrain_generator(netG, module, param, batch_size):
    outel = list(netG._modules.values())[-1].weight.shape[0]
    dwidth = min(1024, outel)
    netD = make_netD(dwidth, batch_size)

    print(
        f"Layer size: {outel}, G params: {param_count(netG)}, D params: {param_count(netD)}"
    )
    optimG = AdamW(netG.parameters(), lr=5e-4, weight_decay=1e-4)
    optimD = AdamW(netD.parameters(), lr=5e-5, weight_decay=1e-4)

    i = 0
    d_adv_meter = AverageMeter()

    while True:
        netG.zero_grad()
        netD.zero_grad()

        z = fast_randn((batch_size, 256), requires_grad=True, device=device)
        q = netG(z)

        free_params([netD])
        freeze_params([netG])

        noise = codes_with_dropout(generate_noise(module, param, batch_size),
                                   dwidth)
        codes = codes_with_dropout(q, dwidth)
        d_real = netD(noise)
        d_fake = netD(codes)

        interp = random_interpolate(noise, codes, device=device)
        gp = calc_gradient_penalty(netD, interp, device=device)
        d_adv = d_fake.mean() - d_real.mean()
        d_loss = d_adv + 10 * gp
        d_adv_meter.update(d_adv.item())
        d_loss.backward(retain_graph=True)

        optimD.step()
        freeze_params([netD])
        free_params([netG])

        d_fake_loss = -d_fake.mean()
        d_fake_loss.backward()

        optimG.step()
        if i % 50 == 0:
            print(d_adv_meter.avg, gp.item())
            if i > 2000 and d_adv_meter.avg > 0:
                break

            d_adv_meter.reset()
        i += 1
Exemplo n.º 2
0
def main(config):
    seed_all()
    os.makedirs('cache', exist_ok=True)
    os.makedirs(config.logdir, exist_ok=True)
    print("Logging to: %s" % config.logdir)
    src_files = sorted(glob('*.py'))
    for src_fn in src_files:
        dst_fn = os.path.join(config.logdir, src_fn)
        copyfile(src_fn, dst_fn)

    train_image_fns = sorted(glob(os.path.join(config.train_dir, '*.jpg')))
    test_image_fns = sorted(glob(os.path.join(config.test_dir, '*.jpg')))

    assert len(train_image_fns) == 3881
    assert len(test_image_fns) == 4150

    gt, label_to_int = load_gt(config.train_rle)
    int_to_label = {v: k for k, v in label_to_int.items()}
    # create folds
    np.random.shuffle(train_image_fns)

    if config.subset > 0:
        train_image_fns = train_image_fns[:config.subset]

    folds = np.arange(len(train_image_fns)) % config.num_folds
    val_image_fns = [
        fn for k, fn in enumerate(train_image_fns) if folds[k] == config.fold
    ]
    train_image_fns = [
        fn for k, fn in enumerate(train_image_fns) if folds[k] != config.fold
    ]

    if config.add_val:
        print("Training on validation set")
        train_image_fns = train_image_fns + val_image_fns[:]

    print(len(val_image_fns), len(train_image_fns))

    # TODO: drop empty images <- is this helpful?
    train_image_fns = [
        fn for fn in train_image_fns if KuzushijiDataset.fn_to_id(fn) in gt
    ]
    val_image_fns = [
        fn for fn in val_image_fns if KuzushijiDataset.fn_to_id(fn) in gt
    ]

    print("VAL: ", len(val_image_fns), val_image_fns[123])
    print("TRAIN: ", len(train_image_fns), train_image_fns[456])

    train_ds = KuzushijiDataset(train_image_fns,
                                gt_boxes=gt,
                                label_to_int=label_to_int,
                                augment=True)
    val_ds = KuzushijiDataset(val_image_fns,
                              gt_boxes=gt,
                              label_to_int=label_to_int)

    if config.cache:
        train_ds.cache()
        val_ds.cache()

    val_loader = data.DataLoader(val_ds,
                                 batch_size=config.batch_size // 8,
                                 shuffle=False,
                                 num_workers=config.num_workers,
                                 pin_memory=config.pin,
                                 drop_last=False)

    model = FPNSegmentation(config.slug)
    if config.weight is not None:
        print("Loading: %s" % config.weight)
        model.load_state_dict(th.load(config.weight))
    model = model.to(config.device)

    no_decay = ['mean', 'std', 'bias'] + ['.bn%d.' % i for i in range(100)]
    grouped_parameters = [{
        'params': [],
        'weight_decay': config.weight_decay
    }, {
        'params': [],
        'weight_decay': 0.0
    }]
    for n, p in model.named_parameters():
        if not any(nd in n for nd in no_decay):
            # print("Decay: %s" % n)
            grouped_parameters[0]['params'].append(p)
        else:
            # print("No Decay: %s" % n)
            grouped_parameters[1]['params'].append(p)
    optimizer = AdamW(grouped_parameters, lr=config.lr)

    if config.apex:
        model, optimizer = apex.amp.initialize(model,
                                               optimizer,
                                               opt_level="O1",
                                               verbosity=0)

    updates_per_epoch = len(train_ds) // config.batch_size
    num_updates = int(config.epochs * updates_per_epoch)
    scheduler = WarmupLinearSchedule(warmup=config.warmup, t_total=num_updates)

    # training loop
    smooth = 0.1
    best_acc = 0.0
    best_fn = None
    global_step = 0
    for epoch in range(1, config.epochs + 1):
        smooth_loss = None
        smooth_accuracy = None
        model.train()
        train_loader = data.DataLoader(train_ds,
                                       batch_size=config.batch_size,
                                       shuffle=True,
                                       num_workers=config.num_workers,
                                       pin_memory=config.pin,
                                       drop_last=True)
        progress = tqdm(total=len(train_ds), smoothing=0.01)
        if True:
            for i, (X, fns, hm, centers, classes) in enumerate(train_loader):
                X = X.to(config.device).float()
                hm = hm.to(config.device)
                centers = centers.to(config.device)
                classes = classes.to(config.device)
                hm_pred, classes_pred = model(X, centers=centers)
                loss = kuzushiji_loss(hm, centers, classes, hm_pred,
                                      classes_pred)
                if config.apex:
                    with apex.amp.scale_loss(loss, optimizer) as scaled_loss:
                        scaled_loss.backward()
                else:
                    loss.backward()

                lr_this_step = None
                if (i + 1) % config.accumulation_step == 0:
                    optimizer.step()
                    optimizer.zero_grad()
                    lr_this_step = config.lr * scheduler.get_lr(
                        global_step, config.warmup)
                    for param_group in optimizer.param_groups:
                        param_group['lr'] = lr_this_step
                    global_step += 1

                smooth_loss = loss.item() if smooth_loss is None else \
                    smooth * loss.item() + (1. - smooth) * smooth_loss
                # print((y_true >= 0.5).sum().item())
                accuracy = th.mean(
                    ((th.sigmoid(hm_pred) >= 0.5) == (hm == 1)).to(
                        th.float)).item()
                smooth_accuracy = accuracy if smooth_accuracy is None else \
                    smooth * accuracy + (1. - smooth) * smooth_accuracy
                progress.set_postfix(
                    ep='%d/%d' % (epoch, config.epochs),
                    loss='%.4f' % smooth_loss,
                    accuracy='%.4f' % (smooth_accuracy),
                    lr='%.6f' %
                    (config.lr if lr_this_step is None else lr_this_step))
                progress.update(len(X))

        # skip validation
        if epoch not in [10, 20, 30, 40, 50]:
            if 1 < epoch <= 65:
                continue

        # validation loop
        model.eval()
        progress = tqdm(enumerate(val_loader), total=len(val_loader))
        hm_correct, classes_correct = 0, 0
        num_hm, num_classes = 0, 0
        with th.no_grad():
            for i, (X, fns, hm, centers, classes) in progress:
                X = X.to(config.device).float()
                hm = hm.cuda()
                centers = centers.cuda()
                classes = classes.cuda()
                hm_pred, classes_pred = model(X)
                hm_pred = th.sigmoid(hm_pred)
                classes_pred = th.nn.functional.softmax(classes_pred, 1)
                hm_cuda = hm.cuda()
                # PyTorch 1.2 has `bool`
                if hasattr(hm_cuda, 'bool'):
                    hm_cuda = hm_cuda.bool()
                hm_correct += (hm_cuda == (hm_pred >=
                                           0.5)).float().sum().item()
                num_hm += np.prod(hm.shape)
                num_samples = len(X)
                for sample_ind in range(num_samples):
                    center_mask = centers[sample_ind, :, 0] != -1
                    per_image_letters = center_mask.sum().item()
                    if per_image_letters == 0:
                        continue
                    num_classes += per_image_letters
                    centers_per_img = centers[sample_ind][center_mask]
                    classes_per_img = classes[sample_ind][center_mask]
                    classes_per_img_pred = classes_pred[
                        sample_ind][:, centers_per_img[:, 1],
                                    centers_per_img[:, 0]].argmax(0)
                    classes_correct += (
                        classes_per_img_pred == classes_per_img).sum().item()
                    num_classes += per_image_letters

        val_hm_acc = hm_correct / num_hm
        val_classes_acc = classes_correct / num_classes
        summary_str = 'f%02d-ep-%04d-val_hm_acc-%.4f-val_classes_acc-%.4f' % (
            config.fold, epoch, val_hm_acc, val_classes_acc)

        progress.write(summary_str)
        if val_classes_acc >= best_acc:
            weight_fn = os.path.join(config.logdir, summary_str + '.pth')
            progress.write("New best: %s" % weight_fn)
            th.save(model.state_dict(), weight_fn)
            best_acc = val_classes_acc
            best_fn = weight_fn
            fns = sorted(
                glob(os.path.join(config.logdir, 'f%02d-*.pth' % config.fold)))
            for fn in fns[:-config.n_keep]:
                os.remove(fn)

    # create submission
    test_ds = KuzushijiDataset(test_image_fns)
    test_loader = data.DataLoader(test_ds,
                                  batch_size=config.batch_size // 8,
                                  shuffle=False,
                                  num_workers=config.num_workers,
                                  pin_memory=False,
                                  drop_last=False)
    if best_fn is not None:
        model.load_state_dict(th.load(best_fn))
    model.eval()
    sub = create_submission(model,
                            test_loader,
                            int_to_label,
                            config,
                            pred_zip=config.pred_zip)
    sub.to_csv(config.submission_fn, index=False)
    print("Wrote to: %s" % config.submission_fn)

    # create val submission
    val_fn = config.submission_fn.replace('.csv', '_VAL.csv')
    model.eval()
    sub = []
    sub = create_submission(model,
                            val_loader,
                            int_to_label,
                            config,
                            pred_zip=config.pred_zip.replace(
                                '.zip', '_VAL.zip'))
    sub.to_csv(val_fn, index=False)
    print("Wrote to: %s" % val_fn)
Exemplo n.º 3
0
def main(config):
  seed_all()
  os.makedirs('cache', exist_ok=True)
  os.makedirs(config.logdir, exist_ok=True)
  print("Logging to: %s" % config.logdir)
  src_files = sorted(glob('*.py'))
  for src_fn in src_files:
    dst_fn = os.path.join(config.logdir, src_fn)
    copyfile(src_fn, dst_fn)

  train_image_fns = sorted(glob(os.path.join(config.train_dir, '*/*/*.dcm')))
  test_image_fns = sorted(glob(os.path.join(config.test_dir, '*/*/*.dcm')))

  # assert len(train_image_fns) == 10712
  # assert len(test_image_fns) == 1377

  gt = load_gt(config.train_rle)
  # create folds
  np.random.shuffle(train_image_fns)

  if config.subset > 0:
    train_image_fns = train_image_fns[:config.subset]

  folds = np.arange(len(train_image_fns)) % config.num_folds
  val_image_fns = [fn for k, fn in enumerate(train_image_fns)
      if folds[k] == config.fold]
  train_image_fns = [fn for k, fn in enumerate(train_image_fns)
      if folds[k] != config.fold]
  # remove not-used files:
  # https://www.kaggle.com/c/siim-acr-pneumothorax-segmentation/discussion/98478#latest-572385  # noqa
  train_image_fns = [fn for fn in train_image_fns
      if DicomDataset.fn_to_id(fn) in gt]
  val_image_fns = [fn for fn in val_image_fns
      if DicomDataset.fn_to_id(fn) in gt]

  print("VAL: ", len(val_image_fns), os.path.basename(val_image_fns[0]))
  print("TRAIN: ", len(train_image_fns), os.path.basename(train_image_fns[0]))

  train_ds = DicomDataset(train_image_fns, gt_rles=gt, augment=True)
  val_ds = DicomDataset(val_image_fns, gt_rles=gt)

  if config.cache:
    train_ds.cache()
    val_ds.cache()

  val_loader = data.DataLoader(val_ds, batch_size=config.batch_size,
                               shuffle=False, num_workers=config.num_workers,
                               pin_memory=config.pin, drop_last=False)

  model = FPNSegmentation(config.slug, ema=config.ema)
  if config.weight is not None:
    print("Loading: %s" % config.weight)
    model.load_state_dict(th.load(config.weight))
  model = model.to(config.device)

  no_decay = ['mean', 'std', 'bias'] + ['.bn%d.' % i for i in range(100)]
  grouped_parameters = [{'params': [], 'weight_decay': config.weight_decay},
      {'params': [], 'weight_decay': 0.0}]
  for n, p in model.named_parameters():
    if not any(nd in n for nd in no_decay):
      print("Decay: %s" % n)
      grouped_parameters[0]['params'].append(p)
    else:
      print("No Decay: %s" % n)
      grouped_parameters[1]['params'].append(p)
  optimizer = AdamW(grouped_parameters, lr=config.lr)

  if config.apex:
    model, optimizer = apex.amp.initialize(model, optimizer, opt_level="O1",
                                           verbosity=0)

  updates_per_epoch = len(train_ds) // config.batch_size
  num_updates = int(config.epochs * updates_per_epoch)
  scheduler = WarmupLinearSchedule(warmup=config.warmup, t_total=num_updates)

  # training loop
  smooth = 0.1
  best_dice = 0.0
  best_fn = None
  global_step = 0
  for epoch in range(1, config.epochs + 1):
    smooth_loss = None
    smooth_accuracy = None
    model.train()
    train_loader = data.DataLoader(train_ds, batch_size=config.batch_size,
                                   shuffle=True, num_workers=config.num_workers,
                                   pin_memory=config.pin, drop_last=True)
    progress = tqdm(total=len(train_ds), smoothing=0.01)
    for i, (X, _, y_true) in enumerate(train_loader):
      X = X.to(config.device).float()
      y_true = y_true.to(config.device)
      y_pred = model(X)
      loss = siim_loss(y_true, y_pred, weights=None)
      if config.apex:
        with apex.amp.scale_loss(loss, optimizer) as scaled_loss:
          scaled_loss.backward()
      else:
        loss.backward()

      lr_this_step = None
      if (i + 1) % config.accumulation_step == 0:
        optimizer.step()
        optimizer.zero_grad()
        lr_this_step = config.lr * scheduler.get_lr(global_step, config.warmup)
        for param_group in optimizer.param_groups:
          param_group['lr'] = lr_this_step
        global_step += 1

      smooth_loss = loss.item() if smooth_loss is None else \
          smooth * loss.item() + (1. - smooth) * smooth_loss
      # print((y_true >= 0.5).sum().item())
      accuracy = th.mean(((y_pred >= 0.5) == (y_true == 1)).to(
          th.float)).item()
      smooth_accuracy = accuracy if smooth_accuracy is None else \
          smooth * accuracy + (1. - smooth) * smooth_accuracy
      progress.set_postfix(ep='%d/%d' % (epoch, config.epochs),
            loss='%.4f' % smooth_loss, accuracy='%.4f' %
            (smooth_accuracy), lr='%.6f' % (config.lr if lr_this_step is None
              else lr_this_step))
      progress.update(len(X))

    if epoch <= 12:
      continue
    # validation loop
    model.eval()
    thresholds = [0.1, 0.2]
    dice_coeffs = [[] for _ in range(len(thresholds))]
    progress = tqdm(enumerate(val_loader), total=len(val_loader))
    with th.no_grad():
      for i, (X, _, y_trues) in progress:
        X = X.to(config.device).float()
        y_trues = y_trues.to(config.device)
        y_preds = model(X)
        y_preds_flip = th.flip(model(th.flip(X, (-1, ))), (-1, ))
        y_preds = 0.5 * (y_preds + y_preds_flip)

        y_trues = y_trues.cpu().numpy()
        y_preds = y_preds.cpu().numpy()
        for yt, yp in zip(y_trues, y_preds):
          yt = (yt.squeeze() >= 0.5).astype('uint8')
          yp = yp.squeeze()
          for dind, threshold in enumerate(thresholds):
            yp_ = (yp >= threshold).astype(np.uint8)
            sc = score(yt, yp_)
            dice_coeffs[dind].append(sc)

    best_threshold_ind = -1
    dice_coeff = -1
    for dind, threshold in enumerate(thresholds):
      dc = np.mean([x[0] for x in dice_coeffs[dind] if x[1] == 'non-empty'])
      # progress.write("Dice @%.2f: %.4f" % (threshold, dc))
      if dc > dice_coeff:
        dice_coeff = dc
        best_threshold_ind = dind

    dice_coeffs = dice_coeffs[best_threshold_ind]
    num_empty = sum(1 for x in dice_coeffs if x[1] == 'empty')
    num_total = len(dice_coeffs)
    num_non_empty = num_total - num_empty
    empty_sum = np.sum([d[0] for d in dice_coeffs if d[1] == 'empty'])
    non_empty_sum = np.sum([d[0] for d in dice_coeffs if d[1] == 'non-empty'])
    dice_coeff_empty = empty_sum / num_empty
    dice_coeff_non_empty = non_empty_sum / num_non_empty
    progress.write('[Empty: %d]: %.3f | %.3f, [Non-Empty: %d]: %.3f | %.3f' % (
        num_empty, dice_coeff_empty, empty_sum / num_total,
        num_non_empty, dice_coeff_non_empty, non_empty_sum / num_total))
    dice_coeff = float(dice_coeff)
    summary_str = 'f%02d-ep-%04d-val_dice-%.4f@%.2f' % (config.fold, epoch,
        dice_coeff, thresholds[best_threshold_ind])
    progress.write(summary_str)
    if dice_coeff > best_dice:
      weight_fn = os.path.join(config.logdir, summary_str + '.pth')
      th.save(model.state_dict(), weight_fn)
      best_dice = dice_coeff
      best_fn = weight_fn
      fns = sorted(glob(os.path.join(config.logdir, 'f%02d-*.pth' %
          config.fold)))
      for fn in fns[:-config.n_keep]:
        os.remove(fn)

  # create submission
  test_ds = DicomDataset(test_image_fns)
  test_loader = data.DataLoader(test_ds, batch_size=config.batch_size,
                               shuffle=False, num_workers=0,
                               pin_memory=False, drop_last=False)
  if best_fn is not None:
    model.load_state_dict(th.load(best_fn))
  model.eval()
  sub = create_submission(model, test_loader, config, pred_zip=config.pred_zip)
  sub.to_csv(config.submission_fn, index=False)
  print("Wrote to: %s" % config.submission_fn)

  # create val submission
  val_fn = config.submission_fn.replace('.csv', '_VAL.csv')
  model.eval()
  sub = []
  sub = create_submission(model, val_loader, config,
      pred_zip=config.pred_zip.replace('.zip', '_VAL.zip'))
  sub.to_csv(val_fn, index=False)
  print("Wrote to: %s" % val_fn)
Exemplo n.º 4
0
def train(name, loader, checkpoint, num_rep, lr, beta1, gamma_gan, num_epochs,
          wd, device):
    discriminator = Discriminator().to(device)
    generator = Generator(num_rep).to(device)

    losses = {'D': [], 'G': []}

    optimizer_D = AdamW(discriminator.parameters(),
                        lr=lr,
                        weight_decay=wd,
                        betas=(beta1, 0.99))
    optimizer_G = AdamW(generator.parameters(),
                        lr=lr,
                        weight_decay=wd,
                        betas=(beta1, 0.99))

    bce = nn.BCELoss()
    mse = nn.MSELoss()
    normalizer = Normalizer(cfg.mean, cfg.std, device)

    if torch.cuda.device_count() > 1:
        generator = nn.DataParallel(generator)
        discriminator = nn.DataParallel(discriminator)

    save_path = Path('.') / 'save' / name
    if not save_path.is_dir():
        save_path.mkdir(parents=True)

    if checkpoint:
        losses = load_checkpoint(save_path, discriminator, generator,
                                 optimizer_D, optimizer_G)

    last_epoch = len(losses['D']) - 1
    logging.info('Last epoch={}'.format(last_epoch))

    for epoch in range(last_epoch + 1, num_epochs):
        losses_G = 0.0
        losses_D = 0.0
        loss_G_gan_acc = 0.0
        loss_G_M_acc = 0.0

        iter_count = 0

        for image, gt, _ in loader:
            batchsize = image.size(0)
            image, gt = image.to(device), gt.to(device)

            # Phrase 1: train the D
            discriminator.zero_grad()
            labels = torch.full((batchsize, 1), 1, device=device)
            output = discriminator(gt)
            D_x = output.mean().item()
            loss_D_real = bce(output, labels)
            loss_D_real.backward()

            fake = generator(image)
            fake = normalizer(fake)
            labels.fill_(0)
            output = discriminator(fake.detach())
            D_G_z1 = output.mean().item()
            loss_D_fake = bce(output, labels)
            loss_D_fake.backward()

            loss_D = loss_D_real.item() + loss_D_fake.item()
            optimizer_D.step()

            # Phrase 2: train the G

            generator.zero_grad()
            output = discriminator(fake)
            D_G_z2 = output.mean().item()
            labels.fill_(1)
            loss_G_gan = bce(output, labels)
            loss_G_gan_acc += loss_G_gan.item()

            loss_G_M = mse(fake, gt)
            loss_G_M_acc += loss_G_M.item()

            loss_G = gamma_gan * loss_G_gan + loss_G_M

            loss_G.backward()
            optimizer_G.step()

            losses_D += loss_D
            losses_G += loss_G.item()
            if iter_count % 20 == 0:
                logging.info(
                    "Iteration {} loss -- Loss D {:.4f}, "
                    "Loss G {:.4f}, D(x) {:.4f} D(g(z)) {:.4f} / {:.4f}".
                    format(iter_count, loss_D, loss_G, D_x, D_G_z1, D_G_z2))

            iter_count += 1

        logging.info("D Loss: {:.4f}, G Loss: {:.4f} at epoch {}.".format(
            losses_D, losses_G, epoch))
        logging.info('loss_G_gan_acc={:.4f}, loss_G_M_acc={:.4f}'.format(
            loss_G_gan_acc, loss_G_M_acc))
        losses['D'].append(losses_D)
        losses['G'].append(losses_G)

        if checkpoint:
            save_checkpoint(save_path, discriminator, generator, optimizer_D,
                            optimizer_G, losses)
Exemplo n.º 5
0
    def objective(SCI_SGD_MOMENTUM, SCI_DROPOUT, SCI_BATCH_SIZE, SCI_L_SECOND, SCI_optimizer, LINEARITY):
        global SCI_REGULARIZATION, SCI_EPOCHS, SCI_loss_type, SCI_RELU
        global SCI_BIAS, SCI_BN_MOMENTUM, device, SCI_LR, MaxCredit, count, CreditVector, CreditVec
        
        SCI_SGD_MOMENTUM = SCI_SGD_MOMENTUM / 10
        DROPOUT = (SCI_DROPOUT / 2).item()
        if SCI_DROPOUT < 0 :
            DROPOUT = 0

        BATCH_SIZE = int(SCI_BATCH_SIZE)
        
        if SCI_L_SECOND < 4 :
            SCI_L_SECOND = 4
            
        if SCI_optimizer < 1 :
            SCI_optimizer = 1
        
        L_SECOND = int(SCI_L_SECOND)
        
        loss_func = nn.CrossEntropyLoss()

        def create_loss(LOSS):   
            if LOSS == 'CrossEntropyLoss':
                loss_func = nn.CrossEntropyLoss()
            if LOSS == 'NLLLoss':
                loss_func = nn.NLLLoss()
            else:
                loss_func = nn.MultiMarginLoss()
            return loss_func

        REGULARIZATION = float(str(SCI_REGULARIZATION))
        optimizer1 = str(SCI_optimizer)

        from cnn_model import CNN6      
        cnn = CNN6(L_FIRST, L_SECOND, KERNEL_X, SCI_BIAS, SCI_BN_MOMENTUM, SCI_RELU, DROPOUT, dataset.CLASSES, LINEARITY)     
    
        if GPU_SELECT == 2:
            if torch.cuda.device_count() > 1:
                cnn = nn.DataParallel(cnn, device_ids=[0, 1], dim=0) 
            cnn = cnn.cuda()
        if GPU_SELECT == 1:
            cnn.to(device)  
        if GPU_SELECT == 0:
            cnn.to(device)        

        cnn.apply(CNN6.weights_reset)
        cnn.share_memory()

        train_losses = []  # to track the training loss as the model trains
        output = 0
        loss = 0
        accuracy = 0
        early_stopping.counter = 0
        early_stopping.best_score = None
        early_stopping.early_stop = False
        early_stopping.verbose = False  
        TEST_RESULTS = torch.zeros(1, 2)
    
        loss_type = create_loss(SCI_loss_type)
        
        from adamw import AdamW
        
        if optimizer1 == '1':
            optimizer = optim.Adam(cnn.parameters(), lr=SCI_LR, betas=(0.9, 0.99), weight_decay=REGULARIZATION)
        if optimizer1 == '2':
            optimizer = optim.Adam(cnn.parameters(), lr=SCI_LR, betas=(0.9, 0.99), weight_decay=REGULARIZATION, amsgrad=True)
        if optimizer1 == '3':
            optimizer = AdamW(cnn.parameters(), lr=SCI_LR, betas=(0.9, 0.99), weight_decay=REGULARIZATION)            
        if optimizer1 == '4':
            optimizer = optim.SGD(cnn.parameters(), lr=SCI_LR, momentum=SCI_SGD_MOMENTUM, weight_decay=REGULARIZATION)
        if optimizer1 == '5':
            optimizer = optim.Adadelta(cnn.parameters(), lr=SCI_LR, weight_decay=REGULARIZATION)
        if optimizer1 == '6':
            optimizer = optim.Adagrad(cnn.parameters(), lr=SCI_LR, weight_decay=REGULARIZATION)
        if optimizer1 > '6':           
            optimizer = optim.Adam(cnn.parameters(), lr=SCI_LR, betas=(0.9, 0.99), weight_decay=REGULARIZATION)
    
        from Utillities import Utillities
        Utillities.listing(optimizer, SCI_SGD_MOMENTUM, SCI_BN_MOMENTUM, L_SECOND, SCI_LR, SCI_RELU, SCI_BIAS, SCI_loss_type, REGULARIZATION, BATCH_SIZE, DROPOUT, LINEARITY)

        train_loader = Data.DataLoader(dataset=dataset.train_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=0, drop_last=True, pin_memory=True)
        validation_loader = Data.DataLoader(dataset=dataset.validation_dataset, batch_size=144, shuffle=False, num_workers=0, drop_last=True, pin_memory=True)    
        test_loader = Data.DataLoader(dataset=dataset.test_dataset, batch_size=599, shuffle=False, num_workers=0, pin_memory=True, drop_last=True)
    
        for epoch in range(SCI_EPOCHS):
            loss = None        
            cnn.train().cuda()
            for step, (train_data, train_target) in enumerate(train_loader):   
                train_data, train_target = train_data.to(device), train_target.to(device)
                output, temp = cnn(train_data)  # forward pass: compute predicted outputs by passing inputs to the model     
                loss = loss_func(output, train_target)
                train_losses.append(loss.item())  # record training loss 
                loss.backward()  # backward pass: compute gradient of the loss with respect to model parameters
                optimizer.zero_grad()
                optimizer.step()  # perform a single optimization step (parameter update)
      
            cnn.eval().cuda()  # switch to evaluation (no change) mode           
            valid_loss = 0
            accuracy = 0
            with torch.no_grad():
                for step, (validation_data, validation_target) in enumerate(validation_loader):
                    validation_data, validation_target = validation_data.to(device), validation_target.to(device)
                    output, temp = cnn(validation_data)  # forward pass: compute predicted outputs by passing inputs to the model
                    valid_loss += loss_func(output, validation_target).item()
                    ps = torch.exp(output)
                    equality = (validation_target[0].data == ps.max(dim=1)[1])
                    accuracy += equality.type(torch.FloatTensor).mean()      
               
            train_losses = []
            early_stopping(valid_loss, cnn)
       
            if early_stopping.early_stop:
                if os.path.exists('checkpoint.pt'):
                    print("Loaded the model with the lowest Validation Loss!")
                    cnn.load_state_dict(torch.load('checkpoint.pt', map_location="cuda:1"))  # Choose whatever GPU device number you want
                    cnn.to(device)
                break
      
        cnn.eval()
        class_correct = list(0. for i in range(1000))
        class_total = list(0. for i in range(1000))
        with torch.no_grad():
            for (test_data, test_target) in test_loader:
                test_data, test_target = test_data.to(device), test_target.to(device)
                outputs, temp = cnn(test_data)
                _, predicted = torch.max(outputs, 1)
                c = (predicted == test_target).squeeze()
                for i in range(test_target.size(0)):
                    label = test_target[i]
                    class_correct[label] += c[i].item()
                    class_total[label] += 1

        for i in range(dataset.CLASSES):
            TEST_RESULTS[0, i] = class_correct[i] / dataset.TESTED_ELEMENTS[i]
            print('Class: ', i, ' accuracy: ', TEST_RESULTS[0, i])
            print('Class: ', i, ' correct: ', class_correct[i])
        percent = (TEST_RESULTS[0, 0] + TEST_RESULTS[0, 1]) / 2
        print('Final percentage: ', percent)
    
        CreditCost = (1 - TEST_RESULTS[0, 0]) * dataset.TESTED_ELEMENTS[0] + (1 - TEST_RESULTS[0, 1]) * dataset.TESTED_ELEMENTS[1] * 5
    
        if TEST_RESULTS[0, 0] == 0 or TEST_RESULTS[0, 1] == 0 :
            CreditCost = CreditCost + 300
    
        print('Last epoch: ', epoch)
   
        if os.path.exists('checkpoint.pt'):  
            os.remove('checkpoint.pt') 

        print()
        torch.cuda.empty_cache()
        print()
        
        CreditCost = CreditCost + (SCI_SGD_MOMENTUM + SCI_DROPOUT + SCI_BATCH_SIZE + SCI_L_SECOND + SCI_optimizer) / 1000
        print('Credit Cost: ', CreditCost)
        
        if -CreditCost > MaxCredit : 
            MaxCredit = -CreditCost
        print('Best Score So Far: ', MaxCredit)   
        
        CreditVector[count] = MaxCredit    
        CreditVec[count] = count
        # plot the data
        fig = plt.figure()
        ax = fig.add_subplot(1, 1, 1)
        ax.plot(CreditVec, -CreditVector, color='tab:orange')
        # print(CreditVec, -CreditVector)
        count = count + 1
        # display the plot
        plt.show()
             
        return CreditCost
Exemplo n.º 6
0
    def objective(SCI_RELU, SCI_BIAS, SCI_loss_type, SCI_optimizer, SCI_BATCH_SIZE, SCI_MM, SCI_REGULARIZATION, SCI_LR, SCI_DROPOUT, SCI_L_SECOND, SCI_EPOCHS, SCI_BN_MOMENTUM, SCI_SGD_MOMENTUM, SCI_LINEARITY):
        global device, MaxCredit  
        global count, CreditVector, CreditVec
        
        SCI_BATCH_SIZE = int(SCI_BATCH_SIZE)  # integer between 4 and 256
        SCI_MM = round(SCI_MM, 3)  # real with three decimals between (0.001, 0.999)
        SCI_REGULARIZATION = round(SCI_REGULARIZATION, 3)  # real with three decimals between (0.001, 0.7)
        SCI_LR = round(SCI_LR, 5)  # real with five decimals between(1e-4, 7e-1)            
        SCI_DROPOUT = round(SCI_DROPOUT, 2)  # real with two decimals between (0, 0.4)
        SCI_L_SECOND = int(SCI_L_SECOND)  # integer between 2 and 64
        SCI_EPOCHS = int(SCI_EPOCHS)  # integer between (100, 500)
        SCI_BN_MOMENTUM = round(SCI_BN_MOMENTUM, 2)  # real with two decimals between (0, 0.99)
        SCI_SGD_MOMENTUM = round(SCI_SGD_MOMENTUM, 2)  # real with two decimals between (0, 0.99) 
        SCI_optimizer = int(SCI_optimizer)  # integer between 1 and 4
        SCI_loss_type = int(SCI_loss_type)  # integer between 1 and 3 ('CrossEntropyLoss', 'MultiMarginLoss','NLLLoss')
        SCI_LINEARITY = int(SCI_LINEARITY)
        if int(SCI_RELU) == 1 :  # integer between 1 and 2 ('True', 'False')
            SCI_RELU = True      
        else:
            SCI_RELU = False      
        if int(SCI_BIAS) == 1 :  # integer between 1 and 2 ('True', 'False')
            SCI_BIAS = True      
        else:
            SCI_BIAS = False  
               
        from cnn_model import CNN6
        cnn = CNN6(L_FIRST, SCI_L_SECOND, KERNEL_X, SCI_BIAS, SCI_BN_MOMENTUM, SCI_RELU, SCI_DROPOUT, dataset.CLASSES, SCI_LINEARITY)     
    
        if GPU_SELECT == 2:
            if torch.cuda.device_count() > 1:
                cnn = nn.DataParallel(cnn, device_ids=[0, 1], dim=0) 
            cnn = cnn.cuda()                
        if GPU_SELECT == 1:
            cnn.to(device)  
        if GPU_SELECT == 0:
            cnn.to(device)        

        # next(cnn.parameters()).is_cuda
        # print(cnn)  # net architecture   
        # list(cnn.parameters()) 
        cnn.apply(CNN6.weights_reset)        
        cnn.share_memory()
     
        loss_func = nn.CrossEntropyLoss()

        def create_loss(LOSS):   
            if LOSS == 1:
                loss_func = nn.CrossEntropyLoss()
            if LOSS == 2:
                loss_func = nn.NLLLoss()
            else:
                loss_func = nn.MultiMarginLoss()
            return loss_func

        MM = float(str(SCI_MM))
        REGULARIZATION = float(str(SCI_REGULARIZATION))
        # optimizer = str(SCI_optimizer)
        LR = float(str(SCI_LR))
        train_losses = []  # to track the training loss as the model trains
        output = 0
        loss = 0
        accuracy = 0
        early_stopping.counter = 0
        early_stopping.best_score = None
        early_stopping.early_stop = False
        early_stopping.verbose = False  
        TEST_RESULTS = torch.zeros(1, 2)
    
        loss_type = create_loss(SCI_loss_type)
    
        from adamw import AdamW
        
        if SCI_optimizer == 1:
            optimizer = optim.Adam(cnn.parameters(), lr=LR, betas=(0.9, 0.99), weight_decay=REGULARIZATION)
        if SCI_optimizer == 2:
            optimizer = optim.Adam(cnn.parameters(), lr=LR, betas=(0.9, 0.99), weight_decay=REGULARIZATION, amsgrad=True)
        if SCI_optimizer == 3:
            optimizer = AdamW(cnn.parameters(), lr=LR, betas=(0.9, 0.99), weight_decay=REGULARIZATION)           
        if SCI_optimizer == 4:
            optimizer = optim.SGD(cnn.parameters(), lr=LR, momentum=SCI_SGD_MOMENTUM, weight_decay=REGULARIZATION)
        if SCI_optimizer == 5:
            optimizer = optim.Adadelta(cnn.parameters(), lr=LR, weight_decay=REGULARIZATION)
        if SCI_optimizer == 6:
            optimizer = optim.Adagrad(cnn.parameters(), lr=LR, weight_decay=REGULARIZATION)
    
        from Utillities import Utillities
        Utillities.listing(optimizer, SCI_SGD_MOMENTUM, SCI_BN_MOMENTUM, SCI_L_SECOND, SCI_LR, SCI_RELU, SCI_BIAS, SCI_loss_type, REGULARIZATION, SCI_BATCH_SIZE, SCI_DROPOUT, SCI_LINEARITY)
    
        # Data Loader for easy mini-batch return in training
        SCI_BATCH_SIZE = int(SCI_BATCH_SIZE)
        train_loader = Data.DataLoader(dataset=dataset.train_dataset, batch_size=SCI_BATCH_SIZE, shuffle=True, num_workers=0, drop_last=True, pin_memory=True)
        validation_loader = Data.DataLoader(dataset=dataset.validation_dataset, batch_size=144, shuffle=True, num_workers=0, drop_last=True, pin_memory=True)    
        test_loader = Data.DataLoader(dataset=dataset.test_dataset, batch_size=599, shuffle=True, num_workers=0, drop_last=True, pin_memory=True)
    
        for epoch in range(SCI_EPOCHS):
            loss = None        
            cnn.train().cuda()
            for step, (train_data, train_target) in enumerate(train_loader):   
                train_data, train_target = train_data.to(device), train_target.to(device)
                output, temp = cnn(train_data)  # forward pass: compute predicted outputs by passing inputs to the model     
                loss = loss_func(output, train_target)
                train_losses.append(loss.item())  # record training loss 
                loss.backward()  # backward pass: compute gradient of the loss with respect to model parameters
                optimizer.zero_grad()
                optimizer.step()  # perform a single optimization step (parameter update)
      
            cnn.eval().cuda()  # switch to evaluation (no change) mode           
            valid_loss = 0
            accuracy = 0
            running_loss = 0.0
            with torch.no_grad():
                for step, (validation_data, validation_target) in enumerate(validation_loader):
                    validation_data, validation_target = validation_data.to(device), validation_target.to(device)
                    output, temp = cnn(validation_data)  # forward pass: compute predicted outputs by passing inputs to the model
                    valid_loss += loss_func(output, validation_target).item()
                    # ps = torch.exp(output)
                    # equality = (validation_target[0].data == ps.max(dim=1)[1])
                    # accuracy += equality.type(torch.FloatTensor).mean()    
                    # print('valid_loss: ', valid_loss)
                    
                    # print statistics
                running_loss += valid_loss
                if epoch % 100 == 0: 
                    print('average loss: %.6f' % (running_loss))
                    running_loss = 0.0
                   
            train_losses = []
            early_stopping(valid_loss, cnn)
        
            if early_stopping.early_stop:
                if os.path.exists('checkpoint.pt'):
                    # cnn = TheModelClass(*args, **kwargs)
                    print("Loaded the model with the lowest Validation Loss!")
                    cnn.load_state_dict(torch.load('checkpoint.pt'))  # Choose whatever GPU device number you want
                    cnn.to(device)
                break
      
        cnn.eval()
        class_correct = list(0. for i in range(1000))
        class_total = list(0. for i in range(1000))
        with torch.no_grad():
            for (test_data, test_target) in test_loader:
                test_data, test_target = test_data.to(device), test_target.to(device)
                outputs, temp = cnn(test_data)
                _, predicted = torch.max(outputs, 1)
                c = (predicted == test_target).squeeze()
                for i in range(test_target.size(0)):
                    label = test_target[i]
                    class_correct[label] += c[i].item()
                    class_total[label] += 1

        for i in range(dataset.CLASSES):
            TEST_RESULTS[0, i] = class_correct[i] / dataset.TESTED_ELEMENTS[i]
            print('Class: ', i, ' accuracy: ', TEST_RESULTS[0, i])   
            print('Class: ', i, ' correct: ', class_correct[i], ' of ', dataset.TESTED_ELEMENTS[i])
        percent = (TEST_RESULTS[0, 0] + TEST_RESULTS[0, 1]) / 2
        print('Final percentage: ', percent)
    
        CreditCost = int((1 - TEST_RESULTS[0, 0]) * dataset.TESTED_ELEMENTS[0] + (1 - TEST_RESULTS[0, 1]) * dataset.TESTED_ELEMENTS[1] * 5)
        
        if TEST_RESULTS[0, 0] == 0 or TEST_RESULTS[0, 1] == 0 :
            CreditCost = CreditCost + 300
    
        print('Last epoch: ', epoch)
        print('Credit Cost: ', -CreditCost)
        # list(cnn.parameters())
    
        if os.path.exists('checkpoint.pt'):  
            os.remove('checkpoint.pt') 

        print()
        
        print()
        
        if -CreditCost > MaxCredit : 
            MaxCredit = -CreditCost
        print('Best Score So Far: ', MaxCredit)    
        
        CreditVector[count] = MaxCredit    
        CreditVec[count] = count
        # plot the data
        fig = plt.figure()
        ax = fig.add_subplot(1, 1, 1)
        ax.plot(CreditVec, -CreditVector, color='tab:blue')
        # print(CreditVec, -CreditVector)
        count = count + 1
        # display the plot
        plt.show()
        
        return -CreditCost
Exemplo n.º 7
0
	 # Iterate over the training set
	for i, sample in enumerate(progress_bar):
			if cuda:
					sample = move_to_cuda(sample)
			if len(sample) == 0:
					continue

			signals = sample['signals']
			#signals = sample['signals'].unsqueeze(-1)
			output = model(signals)
			loss = cost(output, sample['target'])

			optimizer.zero_grad()
			loss.backward()
			grad_norm = torch.nn.utils.clip_grad_norm_(model.parameters(), clip_norm)
			optimizer.step()

			# Update statistics for progress bar
			total_loss = loss.item()
			stats['loss'] += total_loss
			stats['grad_norm'] += grad_norm
			stats['clip'] += 1 if grad_norm > clip_norm else 0
			progress_bar.set_postfix({key: '{:.4g}'.format(value / (i + 1)) for key, value in stats.items()},
															 refresh=True)
	print('Epoch {:03d}: {}'.format(epoch, ' | '.join(key + ' {:.4g}'.format(
						value / len(progress_bar)) for key, value in stats.items())))

	# Validation
	model.eval()

	dev_loader = torch.utils.data.DataLoader(dev_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers, drop_last=True)
Exemplo n.º 8
0
def train_gan(zq=256,
              ze=512,
              batch_size=32,
              outdir=".",
              name="tmp",
              dry=False,
              **kwargs):
    if not dry:
        tensorboard_path = Path(outdir) / 'tensorboard' / name
        model_path = Path(outdir) / 'models' / name
        tensorboard_path.mkdir(exist_ok=True, parents=True)
        model_path.mkdir(exist_ok=True, parents=True)

        sw = SummaryWriter(str(tensorboard_path))

    netT = resnet20().to(device)
    # netT = SimpleConvNet(bias=False).to(device)
    netH = HyperNet(netT, ze, zq).to(device)

    print("Loading pretrained generators...")
    pretrain = torch.load('pretrained.pt')
    netH.load_state_dict(pretrain['netH'])
    netD = SimpleLinearNet(
        [zq * batch_size, zq * batch_size // 2, zq * batch_size // 4, 1024, 1],
        final_sigmoid=True,
        batchnorm=False).to(device)

    print(netT, netH, netD)
    print(f"netT params: {param_count(netT)}")
    print(f"netH params: {param_count(netH)}")
    print(f"netD params: {param_count(netD)}")
    generator_count = param_layer_count(netT)

    optimH = AdamW(netH.parameters(),
                   lr=1e-4,
                   betas=(0.5, 0.9),
                   weight_decay=1e-4)
    optimD = AdamW(netD.parameters(),
                   lr=5e-5,
                   betas=(0.5, 0.9),
                   weight_decay=1e-4)

    g_loss_meter, d_loss_meter = AverageMeter(), AverageMeter()
    d_acc_meter = AverageMeter()
    gp_meter = AverageMeter()
    dgrad_meter = AverageMeter()

    adversarial_loss = nn.BCELoss()
    real_label, fake_label = 0, 1
    label = torch.zeros((generator_count, 1), device=device)

    ops = 0
    start_time = time.time()
    minibatch_count = 1562
    for epoch in range(100000):
        d_loss_meter.reset()
        g_loss_meter.reset()
        d_acc_meter.reset()
        gp_meter.reset()
        dgrad_meter.reset()
        # schedH.step()
        # schedD.step()
        for batch_idx in range(minibatch_count):
            n_iter = epoch * minibatch_count + batch_idx

            netH.zero_grad()
            netD.zero_grad()
            z = fast_randn((batch_size, ze), device=device, requires_grad=True)
            q = netH.encoder(z).view(-1, generator_count, zq)

            # Z Adversary
            free_params([netD])
            freeze_params([netH])

            codes = q.permute((1, 0, 2)).contiguous().view(generator_count, -1)
            noise = fast_randn((generator_count, zq * batch_size),
                               device=device,
                               requires_grad=True)
            d_real = netD(noise)
            d_fake = netD(codes)
            d_real_loss = adversarial_loss(d_real, label.fill_(real_label))
            d_real_loss.backward(retain_graph=True)
            d_fake_loss = adversarial_loss(d_fake, label.fill_(fake_label))
            d_fake_loss.backward(retain_graph=True)
            d_loss = d_real_loss + d_fake_loss
            # gp = calc_gradient_penalty(netD, noise, codes, device=device)
            # d_loss = d_fake.mean() - d_real.mean() + 10 * gp
            # d_loss.backward(retain_graph=True)
            dgrad_meter.update(model_grad_norm(netD))
            d_loss_meter.update(d_loss.item())
            d_acc_meter.update((sum(d_real < 0.5) + sum(d_fake > 0.5)).item() /
                               (generator_count * 2))
            # gp_meter.update(gp.item())

            optimD.step()
            # schedD.batch_step()
            # Train the generator
            freeze_params([netD])
            free_params([netH])

            # fool the discriminator
            # d_fake_loss = -d_fake.mean()
            # d_fake_loss.backward()

            d_fake_loss = adversarial_loss(d_fake, label.fill_(real_label))
            d_fake_loss.backward(retain_graph=True)

            optimH.step()

            with torch.no_grad():
                """ Update Statistics """
                if batch_idx % 50 == 0:
                    current_time = time.time()
                    ops_per_sec = ops // (current_time - start_time)
                    start_time = current_time
                    ops = 0
                    print("*" * 70 + " " + name)
                    print("{}/{} D Loss: {}".format(epoch, batch_idx,
                                                    d_loss.item()))
                    print("{} ops/s".format(ops_per_sec))

                ops += batch_size

                if batch_idx > 1 and batch_idx % 199 == 0:
                    if not dry:
                        sw.add_scalar('G/loss', g_loss_meter.avg, n_iter)
                        sw.add_scalar('D/loss', d_loss_meter.avg, n_iter)
                        sw.add_scalar('D/acc', d_acc_meter.avg, n_iter)
                        sw.add_scalar('D/gp', gp_meter.avg, n_iter)
                        sw.add_scalar('D/gradnorm', dgrad_meter.avg, n_iter)
                        netH.eval()
                        netH_samples = [
                            netH(fast_randn((batch_size, ze)).cuda())
                            for _ in range(10)
                        ]
                        netH.train()
                        sw.add_scalar(
                            'G/g_var',
                            sum(
                                x.std(0).mean() for v in netH_samples
                                for x in v[1].values()) /
                            (generator_count * 10), n_iter)
                        sw.add_scalar(
                            'G/q_var',
                            torch.cat([
                                s[0].view(-1, zq) for s in netH_samples
                            ]).var(0).mean(), n_iter)

                        if kwargs['embeddings']:
                            sw.add_embedding(
                                q.view(-1, zq),
                                global_step=n_iter,
                                tag="q",
                                metadata=list(range(generator_count)) *
                                batch_size)

                        torch.save(
                            {
                                'netH': netH.state_dict(),
                                'netD': netD.state_dict()
                            }, str(model_path / 'pretrain.pt'))