示例#1
0
def main():
    os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu
    nclasses = 10
    epochs = args.epochs
    learning_rate = 0.1
    model_name = args.model_name

    trainloader, testloader = data_loader(model_name)
    model = cnn_model.CNNNet(model_name,
                             nclasses=nclasses,
                             pretrained=args.pretrained)
    model = model.cuda()
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(model.parameters(), lr=learning_rate)

    record = {}
    record['train loss'] = []
    record['train acc'] = []
    record['test loss'] = []
    record['test acc'] = []
    record['epoch time'] = []
    record['batch size'] = args.batch_size
    record['base lr'] = learning_rate
    record['pretrain'] = args.pretrained
    record['filename'] = 'log/' + model_name + '_' + datetime.now().strftime(
        "%y-%m-%d-%H-%M-%S") + '.pkl'
    record['description'] = 'Model: base model\n'

    for epoch in range(epochs):
        epoch_time = time.time()
        train_loss, train_acc = train(trainloader, model, criterion, optimizer)
        train_time = time.time() - epoch_time
        print(
            '[Epoch: %3d/%3d][Train Loss: %5.5f][Train Acc: %5.5f][Epoch Time: %3.3f]'
            % (epoch, epochs, train_loss, train_acc, train_time))
        test_loss, test_acc = test(testloader, model, criterion)
        print('[Epoch: %3d/%3d][Test Loss: %5.5f][Test Acc: %5.5f]' %
              (epoch, epochs, test_loss, test_acc))
        optimizer, learning_rate = adjust_learning_rate(
            optimizer, learning_rate, epoch)

        record['epoch time'].append(train_time)
        record['train loss'].append(train_loss)
        record['train acc'].append(train_acc)
        record['test loss'].append(test_loss)
        record['test acc'].append(test_acc)

    fp = open(record['filename'], 'wb')
    pickle.dump(record, fp)
    fp.close()
示例#2
0
def create_dataset(dataset_name):
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])
    transformations = transforms.Compose([
        transforms.Scale(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(), normalize
    ])
    if dataset_name == 'NUS-WIDE':
        dset_test = dp.DatasetProcessingNUS_WIDE('data/NUS-WIDE',
                                                 'test_img.txt',
                                                 transformations)
        return dset_test
    if dataset_name == 'CIFAR-10':
        dset_test = dp.DatasetProcessingCIFAR_10('data/CIFAR-10',
                                                 'test_img.txt',
                                                 transformations)
        return dset_test
    if dataset_name == 'Project':
        if not os.path.exists('dcodes/adch-project-48bits-record.pkl'):
            record = {}
            dset_database = dp.DatasetProcessingPorject(
                'data/Project', 'database_img.txt', transformations)
            databaseloader = DataLoader(dset_database,
                                        batch_size=1,
                                        shuffle=False,
                                        num_workers=4)
            model = cnn_model.CNNNet('resnet50', 48)
            model.load_state_dict(
                torch.load('dict/adch-nuswide-48bits.pth',
                           map_location=torch.device('cpu')))
            model.eval()
            rB = encode(model, databaseloader, 4985, 48)
            record['rB'] = rB
            with open('dcodes/adch-project-48bits-record.pkl', 'wb') as fp:
                pickle.dump(record, fp)
        dset_test = dp.DatasetProcessingPorject('data/Project', 'test_img.txt',
                                                transformations)
        return dset_test
示例#3
0
def DCDH_algo(code_length):
    os.environ['CUDA_VISIBLE_DEVICES'] = opt.gpu
    torch.manual_seed(0)
    torch.cuda.manual_seed(0)
    '''
    parameter setting
    '''
    max_iter = opt.max_iter
    epochs = opt.epochs
    batch_size = opt.batch_size
    learning_rate = opt.learning_rate
    weight_decay = 5 * 10**-4
    num_samples = opt.num_samples
    gamma = opt.gamma

    record['param']['topk'] = 5000
    record['param']['opt'] = opt
    record['param']['description'] = '[Comment: learning rate decay]'
    logger.info(opt)
    logger.info(code_length)
    logger.info(record['param']['description'])
    '''
    dataset preprocessing
    '''
    nums, dsets, labels = _dataset()
    num_database, num_test = nums
    dset_database, dset_test = dsets
    database_labels, test_labels = labels
    '''
    model construction
    '''
    model = cnn_model.CNNNet(opt.arch, code_length)
    model.cuda()
    DCDH_loss = al.DCDHLoss(gamma, code_length, num_database)
    optimizer = optim.SGD(model.parameters(),
                          lr=learning_rate,
                          weight_decay=weight_decay)

    labelNet = cnn_model.MLP(code_length, 38)
    labelNet.cuda()
    label_loss = al.DCDHLoss(gamma, code_length, num_database)
    optimizer2 = optim.SGD(model.parameters(),
                           lr=learning_rate,
                           weight_decay=weight_decay)

    product_model = cnn_model.ConcatMLP(code_length, 38)
    product_model.cuda()
    product_loss = al.ProductLoss(gamma, code_length, num_samples)
    optimizer3 = optim.Adam(product_model.parameters(),
                            lr=learning_rate,
                            weight_decay=weight_decay)

    V = np.zeros((num_database, code_length))

    model.train()
    labelNet.train()
    product_model.train()

    for iter in range(max_iter):
        iter_time = time.time()
        '''
        sampling and construct similarity matrix
        '''
        select_index = list(np.random.permutation(
            range(num_database)))[0:num_samples]
        _sampler = subsetsampler.SubsetSampler(select_index)
        trainloader = DataLoader(dset_database,
                                 batch_size=batch_size,
                                 sampler=_sampler,
                                 shuffle=False,
                                 num_workers=4)
        '''
        learning deep neural network: feature learning
        '''
        sample_label = database_labels.index_select(
            0, torch.from_numpy(np.array(select_index)))
        Sim = calc_sim(sample_label)

        U = np.zeros((num_samples, code_length), dtype=np.float)
        L = np.zeros((num_samples, code_length), dtype=np.float)
        I = np.zeros((num_samples, code_length), dtype=np.float)

        for epoch in range(epochs):
            for iteration, (train_input, train_label,
                            batch_ind) in enumerate(trainloader):
                batch_size_ = train_label.size(0)
                u_ind = np.linspace(iteration * batch_size,
                                    np.min((num_samples,
                                            (iteration + 1) * batch_size)) - 1,
                                    batch_size_,
                                    dtype=int)
                train_input = Variable(train_input.cuda())

                output = model(train_input)
                outputL = labelNet(train_label.type(torch.FloatTensor).cuda())
                S = calc_sim(train_label)
                U[u_ind, :] = output.cpu().data.numpy()
                L[u_ind, :] = outputL.cpu().data.numpy()
                #
                semanCode = outputL.clone().detach().requires_grad_(True)
                imgCode = output.clone().detach().requires_grad_(True)
                product = torch.einsum('bi,bj->bij', semanCode, imgCode)
                product = product.reshape(batch_size_,
                                          code_length * code_length)

                hashcode, classify = product_model(product.cuda())
                I[u_ind, :] = hashcode.cpu().data.numpy()

                model.zero_grad()
                labelNet.zero_grad()
                product_model.zero_grad()

                loss3 = product_loss(hashcode, V, S,
                                     V[batch_ind.cpu().numpy(), :], classify,
                                     train_label, imgCode, semanCode)
                loss2 = label_loss(output, V, S, V[batch_ind.cpu().numpy(), :])

                loss = DCDH_loss(output, V, S, V[batch_ind.cpu().numpy(), :]
                                 ) + opt.lamda * loss2 + opt.mu * loss3

                loss.backward()
                optimizer.step()
                optimizer2.step()
                optimizer3.step()

        adjusting_learning_rate(optimizer, iter)
        '''
        learning binary codes: discrete coding
        '''
        Q = -2 * code_length * Sim.cpu().numpy().transpose().dot(
            U) - 2 * gamma * U

        for k in range(code_length):
            sel_ind = np.setdiff1d([ii for ii in range(code_length)], k)
            V_ = V[:, sel_ind]
            V_ = V_[select_index, :]
            Uk = U[:, k]
            U_ = U[:, sel_ind]

            V[select_index,
              k] = -np.sign((Q[:, k] + 2 * V_.dot(U_.transpose().dot(Uk))) +
                            opt.lamda * L[:, k] + opt.mu * I[:, k])

        iter_time = time.time() - iter_time
        loss_ = calc_loss(V, U,
                          Sim.cpu().numpy(), code_length, select_index, gamma)
        logger.info('[Iteration: %3d/%3d][Train Loss: %.4f]', iter, max_iter,
                    loss_)
        record['train loss'].append(loss_)
        record['iter time'].append(iter_time)
        '''
        training procedure finishes, evaluation
        '''
        if iter % 10 == 9:
            model.eval()
            testloader = DataLoader(dset_test,
                                    batch_size=1,
                                    shuffle=False,
                                    num_workers=4)
            qB = encode(model, testloader, num_test, code_length)
            rB = V
            map = calc_hr.calc_map(qB, rB, test_labels.numpy(),
                                   database_labels.numpy())
            topkmap = calc_hr.calc_topMap(qB, rB, test_labels.numpy(),
                                          database_labels.numpy(),
                                          record['param']['topk'])
            logger.info('[Evaluation: mAP: %.4f, top-%d mAP: %.4f]', map,
                        record['param']['topk'], topkmap)
            record['rB'] = rB
            record['qB'] = qB
            record['map'] = map
            record['topkmap'] = topkmap
            filename = os.path.join(logdir,
                                    str(code_length) + 'bits-record.pkl')

            _save_record(record, filename)
示例#4
0
def adsh_algo(code_length):
    os.environ['CUDA_VISIBLE_DEVICES'] = opt.gpu
    torch.manual_seed(0)
    torch.cuda.manual_seed(0)
    '''
    parameter setting
    '''
    max_iter = opt.max_iter
    epochs = opt.epochs
    batch_size = opt.batch_size
    learning_rate = opt.learning_rate
    weight_decay = 5 * 10**-4
    num_samples = opt.num_samples
    alpha = opt.alpha
    gamma = opt.gamma
    lamda = opt.lamda
    model_save_path = opt.model_save_path

    record['param']['opt'] = opt
    record['param']['description'] = '[Comment: learning rate decay]'
    logger.info(opt)
    logger.info(code_length)
    logger.info(record['param']['description'])
    '''
    dataset preprocessing
    '''
    nums, dsets, labels = _dataset()
    num_database, num_test = nums
    dset_database, dset_test = dsets
    database_labels, test_labels = labels
    '''
    model construction
    '''
    model = cnn_model.CNNNet(opt.arch, code_length)
    model.cuda()
    adsh_loss = al.ADSHLoss(alpha, gamma, lamda, code_length, num_database)
    optimizer = optim.SGD(model.parameters(),
                          lr=learning_rate,
                          weight_decay=weight_decay)

    V = np.zeros((num_database, code_length))

    model.train()

    for iter in range(max_iter):
        iter_time = time.time()
        '''
        sampling and construct similarity matrix
        '''
        select_index = list(np.random.permutation(
            range(num_database)))[0:num_samples]
        _sampler = subsetsampler.SubsetSampler(select_index)
        trainloader = DataLoader(dset_database,
                                 batch_size=batch_size,
                                 sampler=_sampler,
                                 shuffle=False,
                                 num_workers=4)
        '''
        learning deep neural network: feature learning
        '''
        sample_label = database_labels.index_select(
            0, torch.from_numpy(np.array(select_index)))
        Sim = calc_sim(sample_label, database_labels)
        U = np.zeros((num_samples, code_length), dtype=np.float)
        for epoch in range(epochs):
            for iteration, (train_input, train_label,
                            batch_ind) in enumerate(trainloader):
                batch_size_ = train_label.size(0)
                u_ind = np.linspace(iteration * batch_size,
                                    np.min((num_samples,
                                            (iteration + 1) * batch_size)) - 1,
                                    batch_size_,
                                    dtype=int)
                #是一个batch_size
                train_input = Variable(train_input.cuda())

                output = model(train_input)
                S = Sim.index_select(0, torch.from_numpy(u_ind))
                S_query = calc_sim(sample_label[u_ind, :],
                                   sample_label[u_ind, :])
                U[u_ind, :] = output.cpu().data.numpy()

                model.zero_grad()
                loss = adsh_loss(output, V, S, S_query,
                                 V[batch_ind.cpu().numpy(), :], 1)
                loss.backward()
                optimizer.step()

        #print optimizer.state_dict()
        adjusting_learning_rate(optimizer, iter)
        '''
        learning binary codes: discrete coding
        '''

        barU = np.zeros((num_database, code_length))
        barU[select_index, :] = U
        Q = -2 * code_length * Sim.cpu().numpy().transpose().dot(
            U) - 2 * gamma * barU
        for k in range(code_length):
            sel_ind = np.setdiff1d([ii for ii in range(code_length)], k)
            V_ = V[:, sel_ind]
            Uk = U[:, k]
            U_ = U[:, sel_ind]
            V[:, k] = -np.sign(Q[:, k] + 2 * V_.dot(U_.transpose().dot(Uk)))
        iter_time = time.time() - iter_time
        S_query = calc_sim(sample_label, sample_label)
        square_loss_, quanty_loss_, triplet_loss_, loss_ = calc_loss(
            V, U,
            Sim.cpu().numpy(), code_length, select_index, gamma, lamda)
        logger.info(
            '[Iteration: %3d/%3d][square Loss: %.4f][quanty Loss: %.4f][triplet Loss: %.4f][train Loss: %.4f]',
            iter, max_iter, square_loss_, quanty_loss_, triplet_loss_, loss_)
        record['train loss'].append(loss_)
        record['iter time'].append(iter_time)
    '''
    training procedure finishes, evaluation
    '''

    torch.save(model, model_save_path)
    print "model saved!"

    model.eval()
    testloader = DataLoader(dset_test,
                            batch_size=1,
                            shuffle=False,
                            num_workers=4)
    qB = encode(model, testloader, num_test, code_length)
    rB = V
    #计算有序性
    map = calc_hr.calc_map(qB, rB, test_labels.numpy(),
                           database_labels.numpy())
    logger.info('[Evaluation: mAP: %.4f]', map)
    record['rB'] = rB
    record['qB'] = qB
    record['map'] = map
    filename = os.path.join(logdir, str(code_length) + 'bits-record.pkl')

    _save_record(record, filename)
示例#5
0
def create_retrieval_result_fig(model_name, dataset_name):
    if model_name == 'adch':
        result_dir = '-'.join([
            'result/result-ADCH', dataset_name,
            datetime.now().strftime("%y-%m-%d-%H-%M-%S")
        ])
    else:
        result_dir = '-'.join([
            'result/result-ADSH', dataset_name,
            datetime.now().strftime("%y-%m-%d-%H-%M-%S")
        ])
    os.mkdir(result_dir)
    model = cnn_model.CNNNet('resnet50', 48)
    dimg_paths = []
    timg_paths = []
    timg_ori_paths = []
    if dataset_name == 'CIFAR-10':
        rLfile = open('data/' + dataset_name + '/database_label.txt')
        qLfile = open('data/' + dataset_name + '/test_label.txt')
        rlables = [int(x.strip()) for x in rLfile]
        qLables = [int(x.strip()) for x in qLfile]
        rL = np.zeros([len(rlables), 10])
        for i in range(len(rlables)):
            rL[i, rlables[i]] = 1
        qL = np.zeros([len(qLables), 10])
        for i in range(len(qLables)):
            qL[i, qLables[i]] = 1
        rLfile.close()
        qLfile.close()
    elif dataset_name == 'NUS-WIDE':
        rL = np.loadtxt('data/NUS-WIDE/database_label.txt', dtype=np.int64)
        qL = np.loadtxt('data/NUS-WIDE/test_label.txt', dtype=np.int64)
    if dataset_name == 'Project':
        dfile = open('data/' + dataset_name + '/database_img.txt',
                     'r',
                     encoding='utf-8')
        tfile = open('data/' + dataset_name + '/test_img.txt',
                     'r',
                     encoding='utf-8')
        tfile_ori = open('data/' + dataset_name + '/test_img_bak.txt',
                         'r',
                         encoding='utf-8')
    else:
        dfile = open('data/' + dataset_name + '/database_img.txt', 'r')
        tfile = open('data/' + dataset_name + '/test_img.txt', 'r')
        tfile_ori = open('data/' + dataset_name + '/test_img_bak.txt', 'r')
    if model_name == 'adch':
        if dataset_name == 'CIFAR-10':
            model.load_state_dict(
                torch.load('dict/adch-cifar10-48bits.pth',
                           map_location=torch.device('cpu')))
        else:
            model.load_state_dict(
                torch.load('dict/adch-nuswide-48bits.pth',
                           map_location=torch.device('cpu')))
    else:
        if dataset_name == 'CIFAR-10':
            model.load_state_dict(
                torch.load('dict/adsh-cifar10-48bits.pth',
                           map_location=torch.device('cpu')))
        else:
            model.load_state_dict(
                torch.load('dict/adsh-nuswide-48bits.pth',
                           map_location=torch.device('cpu')))
    for line in dfile.readlines():
        dimg_paths.append(line.strip())
    for line in tfile.readlines():
        timg_paths.append(line.strip())
    for line in tfile_ori.readlines():
        timg_ori_paths.append(line.strip())
    dfile.close()
    tfile.close()
    tfile_ori.close()
    tind = []
    for i in range(5):
        tind.append(timg_ori_paths.index(timg_paths[i]))
    model.eval()
    dset_test = create_dataset(dataset_name)
    testloader = DataLoader(dset_test,
                            batch_size=1,
                            shuffle=False,
                            num_workers=4)
    if dataset_name == 'CIFAR-10':
        f = open('dcodes/adch-cifar10-48bits-record.pkl', 'rb')
    elif dataset_name == 'NUS-WIDE':
        f = open('dcodes/adch-nuswide-48bits-record.pkl', 'rb')
    else:
        f = open('dcodes/adch-project-48bits-record.pkl', 'rb')
    record = pickle.load(f)
    f.close()
    qB = encode(model, testloader, len(dset_test), 48)
    rB = record['rB']
    qimgs = []
    rimgs = []
    accuracies = []
    for i in range(5):
        # accuracy = []
        # gnd = (np.dot(qL[tind[i], :], rL.transpose()) > 0).astype(np.float32)
        hamm = calc_hamming_dist(qB[i], rB)
        ind = np.argsort(hamm)
        rimg_dir = os.path.join(result_dir,
                                timg_paths[i].split('/')[1].split('.')[0])
        os.mkdir(rimg_dir)
        for j in range(24):
            rimg_path = os.path.join('data/' + dataset_name,
                                     dimg_paths[ind[j]])
            dest_path = os.path.join(rimg_dir,
                                     dimg_paths[ind[j]].split('/')[1])
            shutil.copy(rimg_path, dest_path)
        qimg = Image.open(os.path.join('data/' + dataset_name, timg_paths[i]))
        qimg = qimg.resize((200, 100))
        qimgs.append(qimg)
        for j in range(10):
            # accuracy.append(gnd[ind[j]])
            rimg = Image.open(
                os.path.join('data/' + dataset_name, dimg_paths[ind[j]]))
            rimg = rimg.resize((200, 100))
            rimgs.append(rimg)
        # accuracies.append(accuracy)
    plt.figure(figsize=(24, 5.5))
    for i in range(5):
        plt.subplot(5, 11, i * 11 + 1)
        plt.imshow(qimgs[i])
        plt.axis('off')
        for j in range(10):
            plt.subplot(5, 11, i * 11 + j + 2)
            image = rimgs[i * 10 + j]
            # if accuracies[i][j] < 1:
            #     draw = ImageDraw.Draw(image)
            #     draw.line(
            #         [(2, 4), (2, image.size[1] - 4), (image.size[0] - 3, image.size[1] - 4), (image.size[0] - 3, 4),
            #          (2, 4)], width=4, fill='red')
            plt.imshow(image)
            plt.axis('off')
    plt.subplots_adjust(left=0,
                        top=0.8,
                        right=0.8,
                        bottom=0,
                        wspace=0.02,
                        hspace=0.02)
    plt.savefig(os.path.join(result_dir, 'result'), bbox_inches='tight')
    plt.show()
示例#6
0
def algo(code_length):
    os.environ['CUDA_VISIBLE_DEVICES'] = opt.gpu
    torch.manual_seed(0)
    torch.cuda.manual_seed(0)
    '''
    parameter setting
    '''
    epochs = opt.epochs
    batch_size = opt.batch_size
    learning_rate = opt.learning_rate
    temp1 = opt.temp1
    temp2 = opt.temp2
    alpha = opt.alpha
    beta = opt.beta
    img_path = opt.img_path

    record['param']['topk'] = 50
    record['param']['topp'] = 100
    record['param']['opt'] = opt
    logger.info(opt)
    logger.info(code_length)
    '''
    dataset preprocessing
    '''
    nums, dsets, labels = _dataset(img_path)
    num_database, num_test = nums
    dset_database, dset_test = dsets
    database_labels, test_labels = labels
    '''
    model construction
    '''
    model = cnn_model.CNNNet(temp1, code_length, opt.n_class)
    model.cuda()
    proploss = al.proposed(alpha, code_length, temp1, temp2)
    optimizer = optim.SGD(model.parameters(),
                          lr=learning_rate,
                          weight_decay=beta)
    trainloader = DataLoader(dset_database,
                             batch_size=batch_size,
                             shuffle=True,
                             num_workers=4,
                             pin_memory=True)
    model.train()
    for epoch in range(epochs):
        iter_time = time.time()
        for iteration, (train_input, train_label,
                        batch_ind) in enumerate(trainloader):
            train_input = Variable(train_input.cuda(non_blocking=True))
            train_label = Variable(train_label.float().cuda(non_blocking=True),
                                   requires_grad=False)
            output, z, m = model(train_input)
            model.zero_grad()
            loss = proploss(output, z, m, train_label)
            loss.backward()
            optimizer.step()
        '''
        learning binary codes: discrete coding
        '''
        iter_time = time.time() - iter_time
        loss_ = loss.cpu().data.numpy()
        logger.info('[Iteration: %3d/%3d][Train Loss: %.4f][Time: %.3f secs]',
                    epoch + 1, epochs, loss_, iter_time)
        record['train loss'].append(loss_)
        record['iter time'].append(iter_time)

    record['model'] = model
    '''
    training procedure finishes, evaluation
    '''
    model = cnn_model.CNNExtractNet(model)
    model.eval()
    trainloader = DataLoader(dset_database,
                             batch_size=batch_size,
                             shuffle=False,
                             num_workers=4,
                             pin_memory=True)
    testloader = DataLoader(dset_test,
                            batch_size=1,
                            shuffle=False,
                            num_workers=4,
                            pin_memory=True)
    qB = encode(model, testloader, num_test, code_length)
    rB = encode(model, trainloader, num_database, code_length)
    mapka, mapkb, topp_ndcg, topp_acg = calc_hr.calc_metrics(
        qB, rB, test_labels.numpy(), database_labels.numpy(),
        record['param']['topk'], record['param']['topp'],
        record['param']['topp'])
    logger.info(
        '[Evaluation: mAP@%d (A): %.4f, mAP@%d (B): %.4f, top-%d NDCG: %.4f, top-%d ACG: %.4f]',
        record['param']['topk'], mapka, record['param']['topk'], mapkb,
        record['param']['topp'], topp_ndcg, record['param']['topp'], topp_acg)
    record['rB'] = rB
    record['qB'] = qB
    record['mapka'] = mapka
    record['mapkb'] = mapkb
    record['topp_ndcg'] = topp_ndcg
    record['topp_acg'] = topp_acg
    filename = os.path.join(logdir, str(code_length) + 'bits-record.pkl')

    _save_record(record, filename)
import os
from PIL import Image
import pandas as pd
import torch
import torchvision.transforms as transforms
import utils.cnn_model as cnn_model

image_infos = []
image_codes = []
dir_path = "data/Project/images"
model = cnn_model.CNNNet('resnet50', 48)
model.load_state_dict(
    torch.load('dict/adch-nuswide-48bits.pth',
               map_location=torch.device('cpu')))
model.eval()
Image.MAX_IMAGE_PIXELS = 1000000000
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])
transformations = transforms.Compose([
    transforms.Scale(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(), normalize
])
for img_name in os.listdir(dir_path):
    img_path = os.path.join(dir_path, img_name)
    img = Image.open(img_path)
    width = img.size[0]
    height = img.size[1]
    img_info = [img_name, width, height]
    image_infos.append(img_info)
data = image_infos
示例#8
0
def dchuc_algo(code_length):
    os.environ['CUDA_VISIBLE_DEVICES'] = opt.gpu
    torch.manual_seed(0)
    torch.cuda.manual_seed(0)

    '''
    parameter setting
    '''
    max_iter = opt.max_iter
    epochs = opt.epochs
    batch_size = opt.batch_size
    learning_rate = opt.learning_rate
    learning_rate_txt = opt.learning_rate_txt
    weight_decay = 5 * 10 ** -4
    num_samples = opt.num_samples
    gamma = opt.gamma
    alpha = opt.alpha
    beta = opt.beta
    yita = opt.yita
    mu = opt.mu
    y_dim =opt.y_dim
    path = '/data/nus'

    '''
    dataset preprocessing
    '''
    nums, dsets, labels = _dataset(path)
    num_database, num_test = nums
    dset_database, dset_database_txt, dset_test, dset_test_txt = dsets
    database_labels, test_labels = labels
    n_class = test_labels.size()[1]

    testloader = DataLoader(dset_test, batch_size=1,
                            shuffle=False,
                            num_workers=4)
    testloader_txt = DataLoader(dset_test_txt, batch_size=1,
                                shuffle=False,
                                num_workers=4)
    '''
    model construction
    '''
    model = cnn_model.CNNNet(opt.arch, code_length)
    model.cuda()
    model_txt = txt_model.TxtModule(y_dim, code_length)
    model_txt.cuda()
    adsh_loss = dl.DCHUCLoss(gamma, code_length, num_database, alpha, mu)
    adsh_loss_txt = dl.DCHUCLoss(gamma, code_length, num_database, alpha, mu)
    optimizer = optim.SGD(model.parameters(), lr=learning_rate, weight_decay=weight_decay)
    optimizer_txt = optim.SGD(model_txt.parameters(), lr=learning_rate_txt, weight_decay=weight_decay)

    V = np.zeros((num_database, code_length))
    W = np.random.normal(loc=0.0, scale=0.01, size=(code_length, n_class))
    model.train()

    for iter in range(max_iter):
        '''
        sampling and construct similarity matrix
        '''
        select_index = list(np.random.permutation(range(num_database)))[0: num_samples]
        _sampler = subsetsampler.SubsetSampler(select_index)
        trainloader = DataLoader(dset_database, batch_size=batch_size,
                                 sampler=_sampler,
                                 shuffle=False,
                                 num_workers=4)
        trainloader_txt = DataLoader(dset_database_txt, batch_size=batch_size,
                                     sampler=_sampler,
                                     shuffle=False,
                                     num_workers=4)
        '''
        learning deep neural network: feature learning
        '''
        sample_label = database_labels.index_select(0, torch.from_numpy(np.array(select_index)))
        Sim = calc_sim(sample_label, database_labels)
        S1 = calc_sim(sample_label, sample_label)
        U = np.zeros((num_samples, code_length), dtype=np.float)
        G = np.zeros((num_samples, code_length), dtype=np.float)
        for epoch in range(epochs):
            for zz in range(1):
                for iteration, (train_input, train_label, batch_ind) in enumerate(trainloader):
                    batch_size_ = train_label.size(0)
                    u_ind = np.linspace(iteration * batch_size, np.min((num_samples, (iteration + 1) * batch_size)) - 1,
                                        batch_size_, dtype=int)
                    train_input = Variable(train_input.cuda())

                    output = model(train_input)
                    S = Sim.index_select(0, torch.from_numpy(u_ind))
                    S_1 = S.index_select(1, torch.from_numpy(u_ind))
                    U[u_ind, :] = output.cpu().data.numpy()

                    model.zero_grad()
                    loss = adsh_loss(output, G[u_ind, :], V, S, S_1, V[batch_ind.cpu().numpy(), :],
                                     Variable(torch.from_numpy(W).type(torch.FloatTensor).cuda()),
                                     Variable(train_label.type(torch.FloatTensor).cuda()))
                    loss.backward()
                    optimizer.step()

            for zz in range(1):
                for iteration, (train_input, train_label, batch_ind) in enumerate(trainloader_txt):
                    batch_size_ = train_label.size(0)
                    u_ind = np.linspace(iteration * batch_size, np.min((num_samples, (iteration + 1) * batch_size)) - 1,
                                         batch_size_, dtype=int)
                    train_input = train_input.unsqueeze(1).unsqueeze(-1).type(torch.FloatTensor)
                    train_input = Variable(train_input.cuda())

                    output = model_txt(train_input)
                    S = Sim.index_select(0, torch.from_numpy(u_ind))
                    S_1 = S.index_select(1, torch.from_numpy(u_ind))
                    G[u_ind, :] = output.cpu().data.numpy()

                    model_txt.zero_grad()
                    loss = adsh_loss_txt(output, U[u_ind, :], V, S, S_1, V[batch_ind.cpu().numpy(), :],
                                         Variable(torch.from_numpy(W).type(torch.FloatTensor).cuda()),
                                         Variable(train_label.type(torch.FloatTensor).cuda()))
                    loss.backward()
                    optimizer_txt.step()
        adjusting_learning_rate(optimizer, iter)
        adjusting_learning_rate(optimizer_txt, iter)
        '''
        learning binary codes: discrete coding
        '''
        barU = np.zeros((num_database, code_length))
        barG = np.zeros((num_database, code_length))
        barU[select_index, :] = U
        barG[select_index, :] = G
        Q = -2 * code_length * Sim.cpu().numpy().transpose().dot(U + G) - gamma * (barU + barG)\
            - 2 * beta * database_labels.numpy().dot(W.transpose())
        for k in range(code_length):
            sel_ind = np.setdiff1d([ii for ii in range(code_length)], k)
            V_ = V[:, sel_ind]
            W_ = W.transpose()[:, sel_ind]
            Wk = W.transpose()[:, k]
            Uk = U[:, k]
            Gk = G[:, k]
            U_ = U[:, sel_ind]
            G_ = G[:, sel_ind]
            V[:, k] = -np.sign(Q[:, k] + 2 * V_.dot(U_.transpose().dot(Uk) + 2 * G_.transpose().dot(Gk)
                                                    + beta * 2 * W_.transpose().dot(Wk)))

        I = np.eye(code_length)
        P = np.matrix(U.transpose().dot(U)) * alpha + alpha * np.matrix(G.transpose().dot(G))\
            + beta * np.matrix(V.transpose().dot(V)) + yita * I
        PN = np.linalg.pinv(P)
        BL = (alpha * barU + alpha * barG + beta * V).transpose().dot(database_labels.numpy())
        W = np.asarray(PN.dot(BL))

        lossx = calc_loss(W, sample_label, V, U, G, Sim.cpu().numpy(), S1.cpu().numpy(), code_length, select_index, gamma, yita, alpha, mu)

        print('[Iteration: %3d/%3d][Train Loss: %.4f]' % (iter, max_iter, lossx))

    '''
    training procedure finishes, evaluation
    '''

    model.eval()
    model_txt.eval()

    qB = encode(model, testloader, num_test, code_length)
    qB_txt = encode(model_txt, testloader_txt, num_test, code_length, istxt=True)
    rB = V
    map = calc_hr.calc_map(qB, rB, test_labels.numpy(), database_labels.numpy())
    map_txt2img = calc_hr.calc_map(qB_txt, rB, test_labels.numpy(), database_labels.numpy())
    print('[Evaluation: mAP_i2t: %.4f, mAP_txt2img: %.4f]' % (map, map_txt2img))