Exemplo n.º 1
0
def main():
    args = get_args()

    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

    model = Inpaint()
    model = model.to(device)
    optim = torch.optim.Adam(model.parameters(), lr=args.lr, betas=(args.beta1, args.beta2))
    save, load = bind_nsml(model, optim)
    if args.pause == 1:
        nsml.paused(scope=locals())

    if args.mode == 'train':
        path_train = os.path.join(dir_data_root, 'train')
        path_train_data = os.path.join(dir_data_root, 'train', 'train_data')
        tr_loader, val_loader = data_loader_with_split(path_train, batch_size=args.batch_size)

        postfix = dict()
        total_step = 0
        for epoch in trange(args.num_epochs, disable=use_nsml):
            pbar = tqdm(enumerate(tr_loader), total=len(tr_loader), disable=use_nsml)
            for step, (_, x_input, mask, x_GT) in pbar:
                total_step += 1
                x_GT = x_GT.to(device)
                x_input = x_input.to(device)
                mask = mask.to(device)
                x_mask = torch.cat([x_input, mask], dim=1)

                model.zero_grad()
                x_hat = model(x_mask)
                x_composed = compose(x_input, x_hat, mask)
                loss = l1_loss(x_composed, x_GT)
                loss.backward()
                optim.step()
                postfix['loss'] = loss.item()

                if use_nsml:
                    postfix['epoch'] = epoch
                    postfix['step_'] = step
                    postfix['total_step'] = total_step
                    postfix['steps_per_epoch'] = len(tr_loader)

                if step % args.eval_every == 0:
                    vutils.save_image(x_GT, 'x_GT.png', normalize=True)
                    vutils.save_image(x_input, 'x_input.png', normalize=True)
                    vutils.save_image(x_hat, 'x_hat.png', normalize=True)
                    vutils.save_image(mask, 'mask.png', normalize=True)
                    metric_eval = local_eval(model, val_loader, path_train_data)
                    postfix['metric_eval'] = metric_eval
                if use_nsml:
                    if step % args.print_every == 0:
                        print(postfix)
                    nsml.report(**postfix, scope=locals(), step=total_step)
                else:
                    pbar.set_postfix(postfix)
            if use_nsml:
                nsml.save(epoch)
            else:
                save(epoch)
Exemplo n.º 2
0
def main():

    global opt, model
    opt = parser.parse_args()
    cudnn.benchmark = True

    log = Logger()

    # Building model
    module_net = import_module('model.' + opt.network_archi)
    model = getattr(module_net, 'Net')()
    criterion = getattr(module_net, 'criterion')()
    model = model.cuda()
    criterion = criterion.cuda()

    # Setting Optimizer
    optimizer = optim.Adam(filter(lambda p: p.requires_grad,
                                  model.parameters()),
                           lr=opt.lr)

    # *** Reserved for nsml ***
    bind_nsml(model, optimizer)
    if opt.pause:
        nsml.paused(scope=locals())
    # *** Reserved for nsml *** (end)

    if opt.mode == "train":
        if IS_ON_NSML:
            opt.dataset_path = os.path.join(DATASET_PATH, 'train',
                                            'train_data')
        else:
            opt.dataset_path = '/home/data/nipa_faces_sr_tmp2/train/train_data'  # local datapath
        training_data_loader, val_loader = data_loader_with_split(
            opt.dataset_path, train_split=0.9, batch_size=opt.batchSize)

        # Training
        for epoch in range(opt.nEpochs):
            if opt.network_archi.startswith("edsr"):
                average_epoch_loss_train = train(training_data_loader,
                                                 val_loader, optimizer, model,
                                                 criterion, epoch)
                info = {'train_loss': average_epoch_loss_train}

            nsml.save(str(epoch + 1))
            for tag, value in info.items():
                log.scalar_summary(tag, value, epoch)
Exemplo n.º 3
0
    if cuda:
        model = model.cuda()
        loss_fn = loss_fn.cuda()

    optimizer = Adam(
        [param for param in model.parameters() if param.requires_grad],
        lr=base_lr, weight_decay=1e-4)
    scheduler = StepLR(optimizer, step_size=40, gamma=0.1)

    bind_nsml(model, optimizer, scheduler)
    if config.pause:
        nsml.paused(scope=locals())

    if mode == 'train':
        tr_loader, val_loader, val_label_file = data_loader_with_split(root=TRAIN_DATASET_PATH, train_split=train_split)
        time_ = datetime.datetime.now()
        num_batches = len(tr_loader)

        local_eval(model, val_loader, val_label_file)

        for epoch in range(num_epochs):
            scheduler.step()
            model.train()
            for iter_, data in enumerate(tr_loader):
                x, label = data

                if cuda:
                    x = x.cuda()
                    label = label.cuda()
                pred = model(x)
Exemplo n.º 4
0
    #     for lb_id in range(num_classes):
    #         if lbs.count(lb_id) > 150:
    #             continue
    #         targets_only.append(lb_id)
    #     print(targets_only)

    if config.transfer:
        # nsml.load(checkpoint='transfer', session='team_286/4_cls_food/89')
        nsml.load(checkpoint='100', session='team_286/4_cls_food/103')  # cv=1 cutmix 0.5
        # nsml.load(checkpoint='55', session='team_286/7_icls_face/2')
        # nsml.load(checkpoint='transfer', session='team_286/8_iret_food/12')
        # nsml.load(checkpoint='20', session='team_286/9_iret_car/16')
        nsml.save('resave')
        sys.exit(0)

    tr_loader, val_loader, val_label = data_loader_with_split(root=TRAIN_DATASET_PATH, cv_ratio=config.ratio, cv=config.cv, batch_size=C.get()['batch'])
    time_ = datetime.datetime.now()
    best_val_top1 = 0

    dataiter = iter(tr_loader)
    num_steps = 100000 // C.get()['batch']

    from pystopwatch2 import PyStopwatch

    for epoch in range(C.get()['epochs']):
        w = PyStopwatch()
        metrics = Accumulator()
        scheduler.step()
        model.train()
        cnt = 0
        for iter_ in range(num_steps):
Exemplo n.º 5
0
    bind_nsml(model, optimizer, scheduler)
    if config.pause:
        nsml.paused(scope=locals())

    if mode == "train":
        if not IS_ON_NSML:
            logger = logging.getLogger("ResNet")
            logger.setLevel(logging.INFO)
            fileHandler = logging.FileHandler("./test.log")
            streamHandler = logging.StreamHandler()
            logger.addHandler(fileHandler)
            logger.addHandler(streamHandler)

        tr_loader, val_loader, val_label = data_loader_with_split(
            root=TRAIN_DATASET_PATH,
            train_split=train_split,
            batch_size=batch_size)
        time_ = datetime.datetime.now()
        num_batches = len(tr_loader)

        eval_result = local_eval(model, val_loader, val_label)

        for epoch in range(num_epochs):
            epoch_start_time_ = datetime.datetime.now()
            scheduler.step()
            model.train()
            for iter_, data in enumerate(tr_loader):
                _, x, label = data
                if cuda:
                    x = x.cuda()
                    label = label.cuda()
Exemplo n.º 6
0
    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer,
                                                           T_max=num_epochs,
                                                           eta_min=0.)

    bind_nsml(model, optimizer, scheduler)
    if config.pause:
        nsml.paused(scope=locals())

    if config.transfer:
        nsml.load(checkpoint='100', session='team_286/12_idet_food/41')
        nsml.save('resave')
        sys.exit(0)

    if mode == 'train':
        tr_loader, val_loader, val_label_file = data_loader_with_split(
            root=TRAIN_DATASET_PATH,
            train_split=train_split,
            batch_size=config.batch)
        time_ = datetime.datetime.now()
        num_batches = len(tr_loader)

        local_eval(model, val_loader, val_label_file)
        best_iou = 0.
        for epoch in range(num_epochs):
            metrics = Accumulator()
            scheduler.step()
            model.train()
            cnt = 0
            for iter_, data in enumerate(tr_loader):
                x, label = data
                label[:, :,
                      2:] = label[:, :,