import pandas as pd
import time
import os
from config import ModelConfig, FG_FRACTION, RPN_FG_FRACTION, IM_SCALE, BOX_SCALE
from torch.nn import functional as F
from lib.fpn.box_utils import bbox_loss
import torch.backends.cudnn as cudnn
from pycocotools.cocoeval import COCOeval
from lib.pytorch_misc import optimistic_restore, clip_grad_norm
from torch.optim.lr_scheduler import ReduceLROnPlateau

cudnn.benchmark = True
conf = ModelConfig()

if conf.coco:
    train, val = CocoDetection.splits()
    val.ids = val.ids[:conf.val_size]
    train.ids = train.ids
    train_loader, val_loader = CocoDataLoader.splits(
        train,
        val,
        batch_size=conf.batch_size,
        num_workers=conf.num_workers,
        num_gpus=conf.num_gpus)
else:
    train, val, _ = VG.splits(num_val_im=conf.val_size,
                              filter_non_overlap=False,
                              filter_empty_rels=False,
                              use_proposals=conf.use_proposals)
    train_loader, val_loader = VGDataLoader.splits(
        train,
示例#2
0
def main(args):
    print(args)
    check_args(args)
    if not exists(args.output_dir):
        os.makedirs(args.output_dir)
    summary_writer = SummaryWriter(args.output_dir)

    if args.coco:
        train, val = CocoDetection.splits()
        val.ids = val.ids[:args.val_size]
        train.ids = train.ids
        train_loader, val_loader = CocoDataLoader.splits(train, val, batch_size=args.batch_size,
                                                         num_workers=args.num_workers,
                                                         num_gpus=args.num_gpus)
    else:
        train, val, _ = VG.splits(num_val_im=args.val_size, filter_non_overlap=False,
                                  filter_empty_rels=False, use_proposals=args.use_proposals)
        train_loader, val_loader = VGDataLoader.splits(train, val, batch_size=args.batch_size,
                                                       num_workers=args.num_workers,
                                                       num_gpus=args.num_gpus)
    print(train.ind_to_classes)
    os._exit(0)

    all_in_one_model = neural_motifs_sg2im_model(args, train.ind_to_classes)
    # Freeze the detector
    for n, param in all_in_one_model.detector.named_parameters():
        param.requires_grad = False
    all_in_one_model.cuda()
    gan_g_loss, gan_d_loss = get_gan_losses(args.gan_loss_type)

    t, epoch, checkpoint = all_in_one_model.t, all_in_one_model.epoch, all_in_one_model.checkpoint
    while True:
        if t >= args.num_iterations:
            break
        epoch += 1
        print('Starting epoch %d' % epoch)

        for step, batch in enumerate(tqdm(train_loader, desc='Training Epoch %d' % epoch, total=len(train_loader))):
            if t == args.eval_mode_after:
                print('switching to eval mode')
                all_in_one_model.model.eval()
                all_in_one_model.optimizer = optim.Adam(all_in_one_model.parameters(), lr=args.learning_rate)
            t += 1

            with timeit('forward', args.timing):
                result = all_in_one_model[batch]
                imgs, imgs_pred, objs, g_scores_fake_crop, g_obj_scores_fake_crop, g_scores_fake_img, \
                d_scores_fake_crop, d_obj_scores_fake_crop, d_scores_real_crop, d_obj_scores_real_crop, \
                d_scores_fake_img, d_scores_real_img = result.imgs, result.imgs_pred, result.objs, \
                result.g_scores_fake_crop, result.g_obj_scores_fake_crop, result.g_scores_fake_img, \
                result.d_scores_fake_crop, result.d_obj_scores_fake_crop, result.d_scores_real_crop, \
                result.d_obj_scores_real_crop, result.d_scores_fake_img, result.d_scores_real_img

            with timeit('loss', args.timing):
                total_loss, losses = calculate_model_losses(
                    args, imgs, imgs_pred)

                if all_in_one_model.obj_discriminator is not None:
                    total_loss = add_loss(total_loss, F.cross_entropy(g_obj_scores_fake_crop, objs), losses, 'ac_loss',
                                          args.ac_loss_weight)
                    weight = args.discriminator_loss_weight * args.d_obj_weight
                    total_loss = add_loss(total_loss, gan_g_loss(g_scores_fake_crop), losses,
                                          'g_gan_obj_loss', weight)

                if all_in_one_model.img_discriminator is not None:
                    weight = args.discriminator_loss_weight * args.d_img_weight
                    total_loss = add_loss(total_loss, gan_g_loss(g_scores_fake_img), losses,
                                          'g_gan_img_loss', weight)

            losses['total_loss'] = total_loss.item()
            if not math.isfinite(losses['total_loss']):
                print('WARNING: Got loss = NaN, not backpropping')
                continue

            with timeit('backward', args.timing):
                all_in_one_model.optimizer.zero_grad()
                total_loss.backward()
                all_in_one_model.optimizer.step()


            if all_in_one_model.obj_discriminator is not None:
                with timeit('d_obj loss', args.timing):
                    d_obj_losses = LossManager()
                    d_obj_gan_loss = gan_d_loss(d_scores_real_crop, d_scores_fake_crop)
                    d_obj_losses.add_loss(d_obj_gan_loss, 'd_obj_gan_loss')
                    d_obj_losses.add_loss(F.cross_entropy(d_obj_scores_real_crop, objs), 'd_ac_loss_real')
                    d_obj_losses.add_loss(F.cross_entropy(d_obj_scores_fake_crop, objs), 'd_ac_loss_fake')

                with timeit('d_obj backward', args.timing):
                    all_in_one_model.optimizer_d_obj.zero_grad()
                    d_obj_losses.total_loss.backward()
                    all_in_one_model.optimizer_d_obj.step()

            if all_in_one_model.img_discriminator is not None:
                with timeit('d_img loss', args.timing):
                    d_img_losses = LossManager()
                    d_img_gan_loss = gan_d_loss(d_scores_real_img, d_scores_fake_img)
                    d_img_losses.add_loss(d_img_gan_loss, 'd_img_gan_loss')

                with timeit('d_img backward', args.timing):
                    all_in_one_model.optimizer_d_img.zero_grad()
                    d_img_losses.total_loss.backward()
                    all_in_one_model.optimizer_d_img.step()

            if t % args.print_every == 0:
                print('t = %d / %d' % (t, args.num_iterations))
                G_loss_list = []
                for name, val in losses.items():
                    G_loss_list.append('[%s]: %.4f' % (name, val))
                    checkpoint['losses'][name].append(val)
                    summary_writer.add_scalar("G_%s" % name, val, t)
                print("G: %s" % ", ".join(G_loss_list))
                checkpoint['losses_ts'].append(t)

                if all_in_one_model.obj_discriminator is not None:
                    D_obj_loss_list = []
                    for name, val in d_obj_losses.items():
                        D_obj_loss_list.append('[%s]: %.4f' % (name, val))
                        checkpoint['d_losses'][name].append(val)
                        summary_writer.add_scalar("D_obj_%s" % name, val, t)
                    print("D_obj: %s" % ", ".join(D_obj_loss_list))

                if all_in_one_model.img_discriminator is not None:
                    D_img_loss_list = []
                    for name, val in d_img_losses.items():
                        D_img_loss_list.append('[%s]: %.4f' % (name, val))
                        checkpoint['d_losses'][name].append(val)
                        summary_writer.add_scalar("D_img_%s" % name, val, t)
                    print("D_img: %s" % ", ".join(D_img_loss_list))

            if t % args.checkpoint_every == 0:
                print('checking on train')
                train_results = check_model(args, train_loader, all_in_one_model)
                t_losses, t_samples = train_results

                checkpoint['train_samples'].append(t_samples)
                checkpoint['checkpoint_ts'].append(t)
                for name, images in t_samples.items():
                    summary_writer.add_image("train_%s" % name, images, t)

                print('checking on val')
                val_results = check_model(args, val_loader, all_in_one_model)
                val_losses, val_samples = val_results
                checkpoint['val_samples'].append(val_samples)
                for name, images in val_samples.items():
                    summary_writer.add_image("val_%s" % name, images, t)

                for k, v in val_losses.items():
                    checkpoint['val_losses'][k].append(v)
                    summary_writer.add_scalar("val_%s" % k, v, t)
                checkpoint['model_state'] = all_in_one_model.model.state_dict()

                if all_in_one_model.obj_discriminator is not None:
                    checkpoint['d_obj_state'] = all_in_one_model.obj_discriminator.state_dict()
                    checkpoint['d_obj_optim_state'] = all_in_one_model.optimizer_d_obj.state_dict()

                if all_in_one_model.img_discriminator is not None:
                    checkpoint['d_img_state'] = all_in_one_model.img_discriminator.state_dict()
                    checkpoint['d_img_optim_state'] = all_in_one_model.optimizer_d_img.state_dict()

                checkpoint['optim_state'] = all_in_one_model.optimizer.state_dict()
                checkpoint['counters']['t'] = t
                checkpoint['counters']['epoch'] = epoch
                checkpoint_path = os.path.join(args.output_dir,
                                               '%s_with_model.pt' % args.checkpoint_name)
                print('Saving checkpoint to ', checkpoint_path)
                torch.save(checkpoint, checkpoint_path)

                # Save another checkpoint without any model or optim state
                checkpoint_path = os.path.join(args.output_dir,
                                               '%s_no_model.pt' % args.checkpoint_name)
                key_blacklist = ['model_state', 'optim_state', 'model_best_state',
                                 'd_obj_state', 'd_obj_optim_state', 'd_obj_best_state',
                                 'd_img_state', 'd_img_optim_state', 'd_img_best_state']
                small_checkpoint = {}
                for k, v in checkpoint.items():
                    if k not in key_blacklist:
                        small_checkpoint[k] = v
                torch.save(small_checkpoint, checkpoint_path)