Exemplo n.º 1
0
def nDCG5_t2i_atten(q_id_dict, answer, opt):
    score_dict = {}
    for k, v in q_id_dict.items():
        score_dict[k] = []
    for k, v in q_id_dict.items():
        for e in v:
            p_id = e[0]
            im_emb = e[1].unsqueeze(0)
            cap_emb = e[2].unsqueeze(0)
            cap_len = e[3]
            im_mask = e[4].unsqueeze(0)
            weiContext, attn = func_attention(cap_emb,
                                              im_emb,
                                              im_mask,
                                              opt,
                                              smooth=opt.lambda_softmax)
            row_sim = cosine_similarity(cap_emb, weiContext,
                                        dim=2).unsqueeze(0)
            row_sim = row_sim.mean(dim=1, keepdim=True)

            score_dict[k].append((row_sim.cpu().detach().numpy()[0][0], p_id))
        score_dict[k].sort(key=lambda x: x[0], reverse=True)

    if answer is None:
        save_result(score_dict)
        return None
    else:
        ndcg5 = nDCG5(score_dict, answer)

        return ndcg5
Exemplo n.º 2
0
def main(args):
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    model_path = "{}/{}/seed1/checkpoint/model_best.pth.tar".format(
        args.run_folder, args.run)
    out_path = "{}/{}".format(args.out_folder, args.run)

    if not os.path.isdir(out_path):
        os.makedirs(out_path)

    print("LOADING MODEL")
    # load trained SCAN model
    model, opt = load_model(model_path, device)
    model.val_start()

    print("RETRIEVE VOCAB")
    # load vocabulary used by the model
    vocab = deserialize_vocab("{}/{}/{}_vocab_{}.json".format(
        opt.vocab_path, opt.clothing, opt.data_name, opt.version))
    opt.vocab_size = len(vocab)

    word_attn = attn_per_word(args.list_words, opt, vocab, model)

    word_cos = {}
    for word_row in args.list_words:
        dpath = os.path.join(opt.data_path, opt.data_name, opt.clothing)

        loader_test, pos_test = retrieve_loader("test", opt, dpath, word_row,
                                                vocab)
        loader_train, pos_train = retrieve_loader("train", opt, dpath,
                                                  word_row, vocab)

        average_attn = word_attn[word_row]
        img_features = avg_features_img(average_attn, model, loader_train,
                                        loader_test)
        n_image = img_features.shape[0]

        temp_cos = {}
        for word_col in word_attn.keys():
            word_feature = avg_features_word(word_col, model, vocab)
            word_features = word_feature.expand(n_image, -1)
            cosine_scores = cosine_similarity(word_features, img_features)
            temp_cos[word_col] = torch.mean(cosine_scores).item()

        word_cos[word_row] = temp_cos

    print("PLOT ATTENTION")
    write_out(out_path, word_attn, "attention")
    write_table(out_path, word_cos)
    write_fig(out_path, word_cos, args.run)
Exemplo n.º 3
0
def shard_xattn_all(model, images, captions, caplens, opt, shard_size=128):
    n_im_shard = (len(images) - 1) / shard_size + 1
    n_cap_shard = (len(captions) - 1) / shard_size + 1
    alpha = 0.8
    d = np.zeros((len(images), len(captions)))
    label = np.zeros((1000, 5000, 500))
    with torch.no_grad():
        for i in range(n_im_shard):
            im_start, im_end = shard_size * i, min(shard_size * (i + 1),
                                                   len(images))
            for j in range(n_cap_shard):
                sys.stdout.write('\r>> shard_xattn_all batch (%d,%d)' % (i, j))
                cap_start, cap_end = shard_size * j, min(
                    shard_size * (j + 1), len(captions))
                im = Variable(torch.from_numpy(
                    images[im_start:im_end])).float().cuda()
                s = Variable(torch.from_numpy(
                    captions[cap_start:cap_end])).float().cuda()
                l = caplens[cap_start:cap_end]
                sim1, attni = xattn_score_t2i2(im, s, l, opt)
                sim2, attnt = xattn_score_i2t2(im, s, l, opt)
                sims = 0.5 * (sim1 + sim2)
                attnt0 = attnt.size()[0]
                attnt1 = attnt.size()[1]
                attni0 = attni.size()[0]
                attni1 = attni.size()[1]
                attnt = model.FC1(attnt.view(attnt0 * attnt1, 1024))
                attni = model.FC1(attni.view(attni0 * attni1, 1024))
                attnt = attnt.view(attnt0, attnt1, 500)
                attni = attni.view(attni0, attni1, 500)
                attnt = torch.sigmoid(attnt)
                attni = torch.sigmoid(attni)
                sim_label = cosine_similarity(attni, attnt, 2)
                sim = alpha * sims + (1 - alpha) * sim_label
                d[im_start:im_end, cap_start:cap_end] = sim.data.cpu().numpy()
                label[im_start:im_end,
                      cap_start:cap_end, :] = attni.data.cpu().numpy()
        sys.stdout.write('\n')
    return d, label
Exemplo n.º 4
0
def nDCG5_t2i_atten_rerank(q_id_dict, answer, opt):
    score_dict = {}
    for k, v in q_id_dict.items():
        score_dict[k] = []

    count = 0
    for k, v in q_id_dict.items():
        count += 1
        if count % 20 == 0:
            print(count, '/', len(q_id_dict))
        q_g = []
        g_g = []
        p_id_list = []
        mask_list = []
        for e in v:
            p_id = e[0]
            im_emb = e[1].unsqueeze(0)
            cap_emb = e[2].unsqueeze(0)
            cap_len = e[3]
            im_mask = e[4].unsqueeze(0)
            weiContext, attn = func_attention(cap_emb,
                                              im_emb,
                                              im_mask,
                                              opt,
                                              smooth=opt.lambda_softmax)
            row_sim = cosine_similarity(cap_emb, weiContext,
                                        dim=2).unsqueeze(0)
            row_sim = row_sim.mean(dim=1, keepdim=True)

            im = im_emb
            s = cap_emb
            q_g.append(row_sim.cpu().detach().numpy()[0][0])
            g_g.append(im.detach())
            p_id_list.append(p_id)
            mask_list.append(im_mask.detach())

            #row_sim = cosine_similarity(cap_emb, weiContext, dim=2).unsqueeze(0)

            score_dict[k].append((row_sim.cpu().detach().numpy()[0][0], p_id))

        q_g = np.array([q_g], dtype=np.float32)

        q_g = 1 - (q_g + 1) / 2.
        g_g_score = np.ones((len(g_g), len(g_g)), dtype=np.float32)
        '''
        for i in range(1,len(g_g)): 
            for j in range(i): 
                a = g_g[i][0]
                b = g_g[j][0]
                #im_mask = mask_list[j]
                sim = torch.mm(a, b.t())
                row_sim = torch.mean(sim)
                #weiContext, attn = func_attention(a, b, im_mask, opt, smooth=opt.lambda_softmax)
                #row_sim = cosine_similarity(a, weiContext, dim=2).unsqueeze(0)
                #row_sim = row_sim.mean(dim=1, keepdim=False)[0]
                g_g_score[i,j] = row_sim
                g_g_score[j,i] = row_sim
        '''
        for i in range(1, len(g_g)):
            for j in range(i):
                e = v[0]
                cap_emb = e[2].unsqueeze(0)

                a = g_g[i]
                b = g_g[j]
                im_mask = mask_list[j]
                weiContext1, attn = func_attention(cap_emb,
                                                   a,
                                                   mask_list[i],
                                                   opt,
                                                   smooth=opt.lambda_softmax)
                weiContext2, attn = func_attention(cap_emb,
                                                   b,
                                                   im_mask,
                                                   opt,
                                                   smooth=opt.lambda_softmax)
                #weiContext, attn = func_attention(a, b, im_mask, opt, smooth=opt.lambda_softmax)
                #row_sim = cosine_similarity(a, weiContext, dim=2).unsqueeze(0)
                row_sim = cosine_similarity(weiContext1, weiContext2,
                                            dim=2).unsqueeze(0)
                row_sim = row_sim.mean(dim=1, keepdim=False)[0]
                g_g_score[i, j] = row_sim
                g_g_score[j, i] = row_sim

        g_g = g_g_score

        g_g = 1 - (g_g + 1) / 2.
        q_q = np.zeros((1, 1), dtype=np.float32)
        a = re_ranking(q_g, q_q, g_g)
        a = a[0]
        score_dict[k] = [(a[i], p_id_list[i]) for i in range(a.shape[0])]

        score_dict[k].sort(key=lambda x: x[0], reverse=False)
        #score_dict[k].sort(key=lambda x: x[0], reverse=True)
    np.save(opt.score_path, score_dict)

    if answer is None:
        save_result(score_dict)
        return None
    else:
        ndcg5 = nDCG5(score_dict, answer)

        return ndcg5