Exemplo n.º 1
0
def valid(epoch):
    clevr = CLEVR(cfg.DATALOADER.FEATURES_PATH, 'val', transform=None)
    valid_set = DataLoader(
        clevr, batch_size=cfg.DATALOADER.BATCH_SIZE, num_workers=cfg.DATALOADER.NUM_WORKERS, collate_fn=collate_data, drop_last=True
    )
    dataset = iter(valid_set)

    net_running.train(False)
    with torch.no_grad():
        all_corrects = 0

        for image, question, q_len, answer, _, _ in tqdm(dataset):
            image, question = image.to(device), question.to(device)

            output = net_running(image, question, q_len)
            correct = output.detach().argmax(1) == answer.to(device)
            
            all_corrects += correct.float().mean().item()
        
        if scheduler:
            scheduler.step(all_corrects / len(dataset))

        print('Avg Acc: {:.5f}'.format(all_corrects / len(dataset)))

    clevr.close()
Exemplo n.º 2
0
def valid(accum_net, clevr_dir, epoch):
    clevr = CLEVR(clevr_dir, "val", transform=None)
    valid_set = DataLoader(clevr,
                           batch_size=batch_size,
                           num_workers=4,
                           collate_fn=collate_data)
    dataset = iter(valid_set)

    accum_net.train(False)
    family_correct = Counter()
    family_total = Counter()
    with torch.no_grad():
        for image, question, q_len, answer, family in tqdm(dataset):
            image, question = image.to(device), question.to(device)

            output = accum_net(image, question, q_len)
            correct = output.detach().argmax(1) == answer.to(device)
            for c, fam in zip(correct, family):
                if c:
                    family_correct[fam] += 1
                family_total[fam] += 1

    with open("log/log_{}.txt".format(str(epoch + 1).zfill(2)), "w") as w:
        for k, v in family_total.items():
            w.write("{}: {:.5f}\n".format(k, family_correct[k] / v))

    print("Avg Acc: {:.5f}".format(
        sum(family_correct.values()) / sum(family_total.values())))

    clevr.close()
Exemplo n.º 3
0
def valid(epoch):
    clevr = CLEVR(sys.argv[1], 'val', transform=None)
    valid_set = DataLoader(clevr,
                           batch_size=batch_size,
                           num_workers=4,
                           collate_fn=collate_data)
    dataset = iter(valid_set)

    net_running.train(False)
    family_correct = Counter()
    family_total = Counter()
    with torch.no_grad():
        for image, question, q_len, answer, family in tqdm(dataset):
            image, question = image.to(device), question.to(device)

            output = net_running(image, question, q_len)
            correct = output.detach().argmax(1) == answer.to(device)
            for c, fam in zip(correct, family):
                if c:
                    family_correct[fam] += 1
                family_total[fam] += 1

    with open('log/log_{}.txt'.format(str(epoch + 1).zfill(2)), 'w') as w:
        for k, v in family_total.items():
            w.write('{}: {:.5f}\n'.format(k, family_correct[k] / v))

    print('Avg Acc: {:.5f}'.format(
        sum(family_correct.values()) / sum(family_total.values())))

    clevr.close()
Exemplo n.º 4
0
def train(epoch, dataset_type):
    root = args.root
    if dataset_type == "CLEVR":
        dataset_object = CLEVR(root, transform=transform)
    else:
        dataset_object = GQA(root, transform=transform)

    train_set = DataLoader(dataset_object,
                           batch_size=BATCH_SIZE,
                           num_workers=multiprocessing.cpu_count(),
                           collate_fn=collate_data)

    dataset = iter(train_set)
    pbar = tqdm(dataset)
    running_loss = 0
    correct_counts = 0
    total_counts = 0

    net.train()
    for image, question, q_len, answer in pbar:
        image, question, answer = (
            image.to(DEVICE),
            question.to(DEVICE),
            answer.to(DEVICE),
        )
        net.zero_grad()
        output = net(image, question, q_len)
        loss = criterion(output, answer)
        loss.backward()
        optimizer.step()

        correct = output.detach().argmax(1) == answer
        correct_counts += sum(correct).item()
        total_counts += image.size(0)

        correct = correct.clone().type(
            torch.FloatTensor).detach().sum() / BATCH_SIZE
        running_loss += loss.item() / BATCH_SIZE

        pbar.set_description(
            '[Training] Epoch: {}; Loss: {:.8f}; Acc: {:.5f}'.format(
                epoch + 1, loss.item(), correct))

    print('[Training] loss: {:8f}, accuracy: {:5f}'.format(
        running_loss / len(train_set.dataset), correct_counts / total_counts))
    dataset_object.close()
    return running_loss / len(train_set.dataset), correct_counts / total_counts
Exemplo n.º 5
0
def train(net, accum_net, optimizer, criterion, clevr_dir, epoch):
    clevr = CLEVR(clevr_dir, transform=transform)
    train_set = DataLoader(clevr,
                           batch_size=batch_size,
                           num_workers=4,
                           collate_fn=collate_data)

    dataset = iter(train_set)
    pbar = tqdm(dataset)
    moving_loss = 0

    net.train(True)
    for i, (image, question, q_len, answer, _) in enumerate(pbar):
        image, question, answer = (image.to(device), question.to(device),
                                   answer.to(device))

        net.zero_grad()
        output = net(image, question, q_len)
        loss = criterion(output, answer)
        loss.backward()

        # if wrapped in a DataParallel, the actual net is at DataParallel.module
        m = net.module if isinstance(net, nn.DataParallel) else net
        torch.nn.utils.clip_grad_norm_(m.mac.read.parameters(), 1)
        # torch.nn.utils.clip_grad_value_(net.parameters(), 0.05)

        # if i % 1000 == 0:
        #     plot_grad_flow(net.named_parameters())

        optimizer.step()
        correct = output.detach().argmax(1) == answer
        correct = torch.tensor(correct, dtype=torch.float32).sum() / batch_size

        if moving_loss == 0:
            moving_loss = correct

        else:
            moving_loss = moving_loss * 0.99 + correct * 0.01

        pbar.set_description("Epoch: {}; Loss: {:.5f}; Acc: {:.5f}".format(
            epoch + 1, loss.item(), moving_loss))

        accumulate(accum_net, net)

    clevr.close()
Exemplo n.º 6
0
def test(accum_net, clevr_dir):
    print("Starting tests!")
    print(accum_net)
    clevr = CLEVR(clevr_dir, "val", transform=None)
    test_set = DataLoader(clevr,
                          batch_size=batch_size,
                          num_workers=4,
                          collate_fn=collate_data)
    dataset = iter(test_set)

    accum_net.train(False)
    family_correct = Counter()
    family_total = Counter()
    with torch.no_grad():
        for image, question, q_len, answer, family in tqdm(dataset):
            image, question = image.to(device), question.to(device)

            output = accum_net(image, question, q_len)

            # if wrapped in a DataParallel, the actual net is at DataParallel.module
            m = accum_net.module if isinstance(accum_net,
                                               nn.DataParallel) else accum_net
            # [{read, write}, n_steps, batch_size, {??????, n_memories}]
            attentions = m.saved_attns
            for i, step in enumerate(attentions):
                print(f"Step {i}")
                print("Read attn shape:", torch.tensor(step["read"][0]).shape)
                print(image.shape)

            sys.exit()
            correct = output.detach().argmax(1) == answer.to(device)
            for c, fam in zip(correct, family):
                if c:
                    family_correct[fam] += 1
                family_total[fam] += 1

    with open("log/test_log.txt", "w") as w:
        for k, v in family_total.items():
            w.write("{}: {:.5f}\n".format(k, family_correct[k] / v))

    print("Avg Acc: {:.5f}".format(
        sum(family_correct.values()) / sum(family_total.values())))

    clevr.close()
Exemplo n.º 7
0
def valid(epoch, dataset_type):
    root = args.root
    if dataset_type == "CLEVR":
        dataset_object = CLEVR(root, 'val', transform=None)
    else:
        dataset_object = GQA(root, 'val', transform=None)

    valid_set = DataLoader(dataset_object,
                           batch_size=BATCH_SIZE,
                           num_workers=multiprocessing.cpu_count(),
                           collate_fn=collate_data)
    dataset = iter(valid_set)

    net.eval()
    correct_counts = 0
    total_counts = 0
    running_loss = 0.0
    with torch.no_grad():
        pbar = tqdm(dataset)
        for image, question, q_len, answer in pbar:
            image, question, answer = (
                image.to(DEVICE),
                question.to(DEVICE),
                answer.to(DEVICE),
            )

            output = net(image, question, q_len)
            loss = criterion(output, answer)
            correct = output.detach().argmax(1) == answer
            correct_counts += sum(correct).item()
            total_counts += image.size(0)
            running_loss += loss.item() / BATCH_SIZE

            pbar.set_description(
                '[Val] Epoch: {}; Loss: {:.8f}; Acc: {:.5f}'.format(
                    epoch + 1, loss.item(), correct_counts / total_counts))

    print('[Val] loss: {:8f}, accuracy: {:5f}'.format(
        running_loss / len(valid_set.dataset), correct_counts / total_counts))

    dataset_object.close()
    return running_loss / len(valid_set.dataset), correct_counts / total_counts
Exemplo n.º 8
0
def train(epoch):
    clevr = CLEVR(sys.argv[1], transform=transform)
    train_set = DataLoader(
        clevr, batch_size=batch_size, num_workers=4, collate_fn=collate_data
    )

    dataset = iter(train_set)
    pbar = tqdm(dataset)
    moving_loss = 0

    net.train(True)
    for image, question, q_len, answer, _ in pbar:
        image, question, answer = (
            image.to(device),
            question.to(device),
            answer.to(device),
        )

        net.zero_grad()
        output = net(image, question, q_len)
        loss = criterion(output, answer)
        loss.backward()
        optimizer.step()
        correct = output.detach().argmax(1) == answer
        correct = torch.tensor(correct, dtype=torch.float32).sum() / batch_size

        if moving_loss == 0:
            moving_loss = correct

        else:
            moving_loss = moving_loss * 0.99 + correct * 0.01

        pbar.set_description(
            'Epoch: {}; Loss: {:.5f}; Acc: {:.5f}'.format(
                epoch + 1, loss.item(), moving_loss
            )
        )

        accumulate(net_running, net)

    clevr.close()
Exemplo n.º 9
0
def train(epoch):
    clevr = CLEVR(cfg.DATALOADER.FEATURES_PATH, transform=transform)
    train_set = DataLoader(
        clevr, batch_size=cfg.DATALOADER.BATCH_SIZE, num_workers=cfg.DATALOADER.NUM_WORKERS, collate_fn=collate_data, drop_last=True
    )

    dataset = iter(train_set)
    pbar = tqdm(dataset)
    moving_loss = 0

    net.train(True)
    for image, question, q_len, answer, _, _ in pbar:
        image, question, answer = (
            image.to(device),
            question.to(device),
            answer.to(device),
        )

        net.zero_grad()
        output = net(image, question, q_len)
        loss = criterion(output, answer)
        loss.backward()
        if cfg.SOLVER.GRAD_CLIP:
            nn.utils.clip_grad_norm_(net.parameters(), cfg.SOLVER.GRAD_CLIP)
        optimizer.step()
        correct = output.detach().argmax(1) == answer
        accuracy = correct.float().mean().item()

        if moving_loss == 0:
            moving_loss = accuracy
        else:
            moving_loss = moving_loss * 0.99 + accuracy * 0.01

        pbar.set_description(
            'Epoch: {}; Loss: {:.5f}; Acc: {:.5f}'.format(
                epoch, loss.item(), moving_loss
            )
        )
        accumulate(net_running, net)

    clevr.close()
Exemplo n.º 10
0
def valid(epoch, dataset_type):
    if dataset_type == "CLEVR":
        dataset_object = CLEVR('data/CLEVR_v1.0', 'val', transform=None)
    else:
        dataset_object = GQA('data/gqa', 'val', transform=None)

    valid_set = DataLoader(dataset_object, batch_size=BATCH_SIZE, num_workers=multiprocessing.cpu_count(),
                           collate_fn=collate_data)
    dataset = iter(valid_set)

    net.eval()
    correct_counts = 0
    total_counts = 0
    running_loss = 0.0
    with torch.no_grad():
        pbar = tqdm(dataset)
        for image, question, q_len, answer in pbar:
            image, question, answer = (
                image.to(DEVICE),
                question.to(DEVICE),
                answer.to(DEVICE),
            )

            output = net(image, question, q_len)
            loss = criterion(output, answer)
            correct = output.detach().argmax(1) == answer
            correct_counts += sum(correct).item()
            total_counts += image.size(0)
            running_loss += loss.item() / BATCH_SIZE

            pbar.set_description(
                'Epoch: {}; Loss: {:.8f}; Acc: {:.5f}'.format(epoch + 1, loss.item(), correct_counts / total_counts))

    with open('log/log_{}.txt'.format(str(epoch + 1).zfill(2)), 'w') as w:
        w.write('{:.5f}\n'.format(correct_counts / total_counts))

    print('Training loss: {:8f}, accuracy: {:5f}'.format(running_loss / len(valid_set.dataset),
                                                         correct_counts / total_counts))

    dataset_object.close()
Exemplo n.º 11
0
def train(epoch):
    train_set = DataLoader(
        CLEVR(
            sys.argv[1],
            transform=transform,
            reverse_question=reverse_question,
            use_preprocessed=True,
        ),
        batch_size=batch_size,
        num_workers=n_worker,
        shuffle=True,
        collate_fn=collate_data,
    )

    dataset = iter(train_set)
    pbar = tqdm(dataset)
    moving_loss = 0

    relnet.train(True)
    for i, (image, question, q_len, answer, _) in enumerate(pbar):
        image, question, q_len, answer = (
            image.to(device),
            question.to(device),
            torch.tensor(q_len),
            answer.to(device),
        )

        relnet.zero_grad()
        output = relnet(image, question, q_len)
        loss = criterion(output, answer)
        loss.backward()
        nn.utils.clip_grad_norm_(relnet.parameters(), clip_norm)
        optimizer.step()

        correct = output.data.cpu().numpy().argmax(
            1) == answer.data.cpu().numpy()
        correct = correct.sum() / batch_size

        if moving_loss == 0:
            moving_loss = correct

        else:
            moving_loss = moving_loss * 0.99 + correct * 0.01

        pbar.set_description(
            'Epoch: {}; Loss: {:.5f}; Acc: {:.5f}; LR: {:.6f}'.format(
                epoch + 1,
                loss.detach().item(),
                moving_loss,
                optimizer.param_groups[0]['lr'],
            ))
Exemplo n.º 12
0
def valid(epoch):
    valid_set = DataLoader(CLEVR(args.data_dir, args.segs_dir, 'val'),
                           batch_size=args.batch_size,
                           shuffle=True,
                           num_workers=16,
                           collate_fn=collate_data,
                           pin_memory=args.cuda)
    #    dataset = iter(valid_set)

    relnet.train(True)
    #    step = 0
    avg_loss = 0
    avg_acc = 0
    for step, (apps, masks, num_layers, question, q_len, answer,
               family) in enumerate(valid_set):
        #        step += 1
        q_len = torch.LongTensor(np.array(q_len))
        num_layers = torch.LongTensor(np.array(num_layers))
        if args.cuda:
            apps, masks, num_layers, question, answer, q_len = \
            Variable(apps).cuda(), Variable(masks).cuda(), Variable(num_layers).cuda(),  Variable(question).cuda(), Variable(answer).cuda(), Variable(q_len).cuda()

        else:
            apps, masks, num_layers, question, answer, q_len = \
            Variable(apps), Variable(masks), Variable(num_layers), Variable(question), Variable(answer), Variable(q_len)

        output = relnet(apps, masks, num_layers, question, q_len)
        #        correct = output.data.cpu().numpy().argmax(1) == answer.data.cpu().numpy()
        pred_answer = output.data.cpu().numpy().argmax(1)
        accuracy = np.mean(answer.data.cpu().numpy() == pred_answer)

        avg_acc += accuracy
        loss = torch.sum(criterion(output, answer))
        avg_loss += loss.data[0]

        if step % args.log_interval == 0:
            print ('Epoch: {}; Step: {:d}; Loss: {:.5f}; Avg_Accuracy: {:.5f}'. \
                            format(epoch, step, loss.data[0], avg_acc/(step+1)))

    with open('logs_valid.csv', 'a') as csvfile_valid:
        fieldnames_valid = ['epoch', 'valid_loss', 'valid_acc']
        writer_valid = csv.DictWriter(csvfile_valid,
                                      fieldnames=fieldnames_valid)
        writer_valid.writerow({
            'epoch': epoch,
            'valid_loss': avg_loss / (step + 1),
            'valid_acc': avg_acc / (step + 1)
        })

    print('Epoch: {:d}; Avg Acc: {:.5f}'.format(epoch, avg_acc / (step + 1)))
Exemplo n.º 13
0
def valid(epoch):
    valid_set = DataLoader(
        CLEVR(
            sys.argv[1],
            'val',
            transform=None,
            reverse_question=reverse_question,
            use_preprocessed=True,
        ),
        batch_size=batch_size // 2,
        num_workers=4,
        collate_fn=collate_data,
    )
    dataset = iter(valid_set)

    relnet.eval()
    class_correct = Counter()
    class_total = Counter()

    with torch.no_grad():
        for image, question, q_len, answer, answer_class in tqdm(dataset):
            image, question, q_len = (
                image.to(device),
                question.to(device),
                torch.tensor(q_len),
            )

            output = relnet(image, question, q_len)
            correct = output.data.cpu().numpy().argmax(1) == answer.numpy()
            for c, class_ in zip(correct, answer_class):
                if c:
                    class_correct[class_] += 1
                class_total[class_] += 1

    class_correct['total'] = sum(class_correct.values())
    class_total['total'] = sum(class_total.values())

    with open('log/log_{}.txt'.format(str(epoch + 1).zfill(3)), 'w') as w:
        for k, v in class_total.items():
            w.write('{}: {:.5f}\n'.format(k, class_correct[k] / v))

    print('Avg Acc: {:.5f}'.format(class_correct['total'] /
                                   class_total['total']))
Exemplo n.º 14
0
import sys
import pickle

from tqdm import tqdm
from torch.utils.data import DataLoader
from dataset import CLEVR, collate_data, transform
from model import RelationNetworks

batch_size = 64
n_epoch = 180

train_set = DataLoader(CLEVR(sys.argv[1], transform=transform),
                       batch_size=batch_size,
                       num_workers=4)

for epoch in range(n_epoch):
    dataset = iter(train_set)
    pbar = tqdm(dataset)

    for image, question, q_len, answer in pbar:
        pass
Exemplo n.º 15
0
import sys
import pickle
from collections import Counter

import torch
from tqdm import tqdm
from torch.utils.data import DataLoader
from dataset import CLEVR, collate_data, transform

batch_size = 64
n_epoch = 180

train_set = DataLoader(
    CLEVR(sys.argv[1], 'val', transform=None),
    batch_size=batch_size,
    num_workers=4,
    collate_fn=collate_data,
)
net = torch.load(sys.argv[2])
net.eval()

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

for epoch in range(n_epoch):
    dataset = iter(train_set)
    pbar = tqdm(dataset)
    family_correct = Counter()
    family_total = Counter()

    for image, question, q_len, answer, family in pbar:
        image, question = image.to(device), question.to(device)
Exemplo n.º 16
0
def train(epoch):
    train_set = DataLoader(CLEVR(args.data_dir, args.segs_dir, 'train'),
                           batch_size=args.batch_size,
                           shuffle=True,
                           num_workers=16,
                           collate_fn=collate_data,
                           pin_memory=args.cuda)

    #    dataset = iter(train_set)
    #    pbar = tqdm(dataset)
    moving_loss = 0

    relnet.train(True)
    avg_loss = 0
    avg_acc = 0
    for step, (apps, masks, num_layers, question, q_len, answer,
               family) in enumerate(train_set):
        #        print (answer)
        #        answer_value = answer
        #        step += 1
        start_time = time.time()
        q_len = torch.LongTensor(np.array(q_len))
        num_layers = torch.LongTensor(np.array(num_layers))
        if args.cuda:
            apps, masks, num_layers, question, answer, q_len = \
            Variable(apps).cuda(), Variable(masks).cuda(), Variable(num_layers).cuda(),  Variable(question).cuda(), Variable(answer).cuda(), Variable(q_len).cuda()

        else:
            apps, masks, num_layers, question, answer, q_len = \
            Variable(apps), Variable(masks), Variable(num_layers), Variable(question), Variable(answer), Variable(q_len)

        relnet.zero_grad()
        output = relnet(apps, masks, num_layers, question, q_len)

        pred_answer = output.data.cpu().numpy().argmax(1)
        accuracy = np.mean(answer.data.cpu().numpy() == pred_answer)

        avg_acc += accuracy
        loss = torch.sum(criterion(output, answer))
        loss.backward()
        optimizer.step()

        if moving_loss == 0:
            moving_loss = loss.data[0]

        else:
            moving_loss = moving_loss * 0.9 + loss.data[0] * 0.1

        avg_loss += loss.data[0]
        #        pbar.set_description('Epoch: {}; Loss: {:.5f}; Avg: {:.5f}'. \
        #                            format(epoch + 1, loss.data[0], moving_loss))
        exm_per_sec = args.batch_size / (time.time() - start_time)
        if step % args.log_interval == 0:
            print(
                '{}; Epoch: {}; Step: {:d}; Loss: {:.5f}; Avg: {:.5f}; Avg_Accuracy: {:.5f}; Example/sec: {:.5f}'
                .format(datetime.datetime.now(), epoch, step, loss.data[0],
                        avg_loss / (step + 1), avg_acc / (step + 1),
                        exm_per_sec))


#            print ('{}; Epoch: {}; Step: {:d}; Loss: {:.5f}; Avg: {:.5f}; Avg_Accuracy: {:.5f}'.format(datetime.datetime.now(), epoch + start_from_epoch, step, loss.data[0], avg_loss/(step+1), avg_acc/(step+1)))

    with open('logs_train.csv', 'a') as csvfile_train:
        fieldnames_train = ['epoch', 'train_loss', 'train_acc']
        writer_train = csv.DictWriter(csvfile_train,
                                      fieldnames=fieldnames_train)
        writer_train.writerow({
            'epoch': epoch,
            'train_loss': avg_loss / (step + 1),
            'train_acc': avg_acc / (step + 1)
        })
Exemplo n.º 17
0
import sys
import pickle
from collections import Counter

import torch
from tqdm import tqdm
from torch.utils.data import DataLoader
from dataset import CLEVR, collate_data, transform

batch_size = 64
n_epoch = 180

train_set = DataLoader(
    CLEVR(sys.argv[1], "val", transform=None),
    batch_size=batch_size,
    num_workers=4,
    collate_fn=collate_data,
)
net = torch.load(sys.argv[2])
net.eval()

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

for epoch in range(n_epoch):
    dataset = iter(train_set)
    pbar = tqdm(dataset)
    family_correct = Counter()
    family_total = Counter()

    for image, question, q_len, answer, family in pbar:
        image, question = image.to(device), question.to(device)