Пример #1
0
def model_1(args):
    if os.path.isdir(os.getcwd() + '/results/images/' +
                     args.run_name) is False:
        os.mkdir(os.getcwd() + '/results/images/' + args.run_name)

    if os.path.isdir(os.getcwd() + '/results/history/' +
                     args.run_name) is False:
        os.mkdir(os.getcwd() + '/results/history/' + args.run_name)

    if os.path.isdir(os.getcwd() + '/results/files/' + args.run_name) is False:
        os.mkdir(os.getcwd() + '/results/files/' + args.run_name)

    datapath = args.datadir
    #args.batch_size = 2
    args.img_size = 224
    dataset, data_loader = utils.get_dataset(datapath, args.img_size, \
            args.batch_size)
    classes, class_to_idx, idx_to_class = utils.get_classes(dataset)
    word_dim = 300
    label_criterion = nn.CrossEntropyLoss()
    reconstr_criterion = nn.L1Loss()
    #reconstr_criterion = nn.MSELoss()

    model = BimodalDAEImage(300, 2048, n_classes=len(classes))
    cnn = resnet101(pretrained=True)
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=args.learning_rate,
                                 weight_decay=1e-5)
    print('\nNum classes: %r, num images: %r' % (len(classes), len(dataset)))

    #### change temp
    #word_vecs = utils.get_wvecs_json(os.getcwd() + '/data/files/wvecs.json', classes, word_dim)
    word_vecs = utils.get_word_vectors(os.getcwd() + '/data/files/wvecs.json',
                                       classes, word_dim)

    loss_hist, metric_hist = {}, {}
    softmax = nn.Softmax(dim=1)
    for epoch in range(args.epochs):
        print('Epoch %r' % epoch)
        log.info('Epoch %r' % epoch)
        loss_hist[epoch], metric_hist[epoch] = {}, {}
        for batch_idx, (img, target_tensor) in enumerate(data_loader):
            batch_acc, batch_loss = [], {'reconstr': [], 'classification': []}
            target_idxs = target_tensor.data.numpy().tolist()
            target_names = [idx_to_class[idx] for idx in target_idxs]
            target_labels = torch.tensor([[1 if i == idx else 0 for i in \
                    range(len(classes))] for idx in target_idxs], \
                    dtype=torch.long)

            # previously target dist reps
            target_textual = torch.tensor([word_vecs[name] for name in target_names], \
                                            dtype=torch.float32)
            #print('Text', target_textual.size())

            #img_rep = img[0].reshape(1, 3, args.img_size, args.img_size)
            #print(img_rep.size())
            #rep = vgg.forward(img_rep)
            #print(rep.size())
            target_visual = torch.tensor([
                cnn.forward(img[idx].reshape(1, 3, args.img_size,
                                             args.img_size)).data.numpy()
                for idx in range(len(target_idxs))
            ],
                                         dtype=torch.float32)

            #print('Visual', target_visual.size())

            n_samples = len(target_idxs)
            optimizer.zero_grad()

            img_reconstr, text_reconstr, hidden = model.forward(target_visual, \
                                                              target_textual)
            textual_loss = reconstr_criterion(text_reconstr, target_textual)
            textual_loss.backward(retain_graph=True)
            visual_loss = reconstr_criterion(img_reconstr, target_visual)
            visual_loss.backward(retain_graph=True)

            #print('Textual reconstr', text_reconstr.size())

            #print('Visual reconstr', img_reconstr.size())
            #print('Hidden', hidden.size())
            preds = softmax(hidden)

            pred_loss = label_criterion(preds, target_tensor)
            pred_loss.backward()

            optimizer.step()

            if epoch % 10 == 0:
                state = {'epoch': epoch + 1, 'state_dict': \
                        model.state_dict(), 'optimizer': optimizer.state_dict()}
                torch.save(state,
                           os.getcwd() + "/model_states/" + args.run_name)

    return
Пример #2
0
def model_1(args):
    if os.path.isdir(os.getcwd() + '/results/images/' + args.run_name) is False:
        os.mkdir(os.getcwd() + '/results/images/' + args.run_name)

    if os.path.isdir(os.getcwd() + '/results/history/' + args.run_name) is False:
        os.mkdir(os.getcwd() + '/results/history/' + args.run_name)

    if os.path.isdir(os.getcwd() + '/results/files/' + args.run_name) is False:
        os.mkdir(os.getcwd() + '/results/files/' + args.run_name)

    datapath = args.datadir
    args.img_size = 224

    dataset, data_loader = utils.get_dataset(datapath, args.img_size, \
            args.batch_size)
    classes, class_to_idx, idx_to_class = utils.get_classes(dataset)
    word_dim = 300
    label_dim = len(classes)


    model = BimodalDAEImage(300, 2048, n_classes=len(classes))
    cnn = resnet101(pretrained=True)
    optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate,
                             weight_decay=1e-5)
    print('\nNum classes: %r, num images: %r' % (len(classes), len(dataset)))

    word_vecs = utils.get_wvecs_json(os.getcwd() + '/data/files/wvecs.json', classes, word_dim)
    #word_vecs = utils.get_word_vectors(os.getcwd() + '/data/files/wvecs.json', classes, word_dim)

    encoding_dict = {}
    with torch.no_grad():
        for batch_idx, (img, target_tensor) in enumerate(data_loader):

            target_idxs = target_tensor.data.numpy().tolist()
            target_names = [idx_to_class[idx] for idx in target_idxs]
            target_labels = torch.tensor([[1 if i == idx else 0 for i in \
                    range(len(classes))] for idx in target_idxs], \
                    dtype=torch.long)

            # previously target dist reps
            target_textual = torch.tensor([word_vecs[name] for name in target_names], \
                                            dtype=torch.float32)

            target_visual = torch.tensor(
                [cnn.forward(
                    img[idx].reshape(1, 3, args.img_size, args.img_size)).data.numpy() for idx in range(len(target_idxs))], dtype=torch.float32
            )

            n_samples = len(target_idxs)

            img_reconstr, text_reconstr, hidden = model.forward(target_visual, \
                                                              target_textual)

            print('Hidden', hidden.size())
            #preds = softmax(hidden)
            reps = hidden.data.numpy()
            for idx in range(len(reps)):
                target = target_names[idx]
                print(target)
                if target not in encoding_dict.keys():
                    encoding_dict[target] = []
                #val = reps[idx].view(1, -1)
                encoding_dict[target].append(list(reps[idx].tolist()))


    f = open(os.getcwd() + '/results/files/' + args.run_name + '/encoding_dict.json', 'w+')
    f.write(json.dumps(encoding_dict))
    print('Eval done!')