Exemplo n.º 1
0
    def yield_test_questions_K_edges(self,
                                     resampled=False,
                                     K=1,
                                     subset=False,
                                     expand_outfit=False):
        """
        Yields questions, each of them with their own adj matrix.
        Each node on the question will be expanded to create K edges, so the adj
        matrix will have K*N edges.
        Also, the edges between the nodes of the outfit will be also present (except for the correct choice edges).
        The method to get this edges will be BFS.

        Args:
            resampled: if True, use the resampled version
            K: number of edges to expand for each question node
            subset: if true, use only a subset of the outfit as the query, and
                    use the rest as links to the choices.

        Returns:
            yields questions
        """
        assert K >= 0
        from utils import Graph

        # each question consists on N*4 edges to predict
        # self.questions is a list of questions with N elements and 4 possible choices (answers)
        questions = self.questions if not resampled else self.questions_resampled
        n_nodes = self.test_adj.shape[0]
        for question in questions:
            outfit_ids = []
            choices_ids = []
            gt = []
            valid = []
            # keep only a subset of the outfit
            if subset:
                outfit_subset = np.random.choice(question[0], 3, replace=False)
            else:
                outfit_subset = question[0]
            for index in outfit_subset:  # indexes of outfit nodes
                i = 0
                for index_answer in question[
                        1]:  # indexes of possible choices answers
                    outfit_ids.append(index)
                    choices_ids.append(index_answer)
                    gt.append(
                        int(i == 0))  # the correct connection is the first
                    # a link is valid if the candidate item is from the same category as the missing item
                    valid.append(int(question[2][i] == question[3]))
                    i += 1

            # question adj with only the outfit edges
            question_adj = sp.csr_matrix((n_nodes, n_nodes))
            question_adj = question_adj.tolil()
            if not expand_outfit:
                for j, u in enumerate(outfit_subset[:-1]):
                    for v in outfit_subset[j + 1:]:
                        question_adj[u, v] = 1
                        question_adj[v, u] = 1

            if K > 0:
                # the K edges that will be sampled from each not will not belong to the outfit, and should not be the query edges, so remove them
                available_adj = self.test_adj.copy()
                available_adj = available_adj.tolil()
                for j, u in enumerate(question[0][:-1]):
                    for v in question[0][j + 1:]:
                        available_adj[u, v] = 0
                        available_adj[v, u] = 0
                if expand_outfit:  # activate intra-outfit edges
                    for j, u in enumerate(outfit_subset[:-1]):
                        for v in outfit_subset[j + 1:]:
                            available_adj[u, v] = 1
                            available_adj[v, u] = 1
                for u, v in zip(outfit_ids, choices_ids):
                    available_adj[u, v] = 0
                    available_adj[v, u] = 0
                available_adj = available_adj.tocsr()
                available_adj.eliminate_zeros()

                G = Graph(available_adj)

                extra_edges = []
                # now fill the adj matrix with the expanded edges for each node (only for the choices)
                nodes_to_expand = choices_ids[:4]

                if expand_outfit:  # expand the outfit items as well
                    nodes_to_expand.extend(outfit_subset)

                for node in nodes_to_expand:
                    edges = G.run_K_BFS(node, K)

                    for edge in edges:
                        u, v = edge
                        question_adj[u, v] = 1
                        question_adj[v, u] = 1
                        extra_edges.append(edge)

            question_adj = question_adj.tocsr()

            yield question_adj, np.array(outfit_ids), np.array(
                choices_ids), np.array(gt), np.array(valid)
Exemplo n.º 2
0
def test_compatibility(args):
    args = namedtuple("Args", args.keys())(*args.values())
    load_from = args.load_from
    config_file = load_from + '/results.json'
    log_file = load_from + '/log.json'

    with open(config_file) as f:
        config = json.load(f)
    with open(log_file) as f:
        log = json.load(f)

    # Dataloader
    DATASET = config['dataset']
    if DATASET == 'polyvore':
        # load dataset
        dl = DataLoaderPolyvore()
        orig_train_features, adj_train, train_labels, train_r_indices, train_c_indices = dl.get_phase(
            'train')
        full_train_adj = dl.train_adj
        orig_val_features, adj_val, val_labels, val_r_indices, val_c_indices = dl.get_phase(
            'valid')
        orig_test_features, adj_test, test_labels, test_r_indices, test_c_indices = dl.get_phase(
            'test')
        full_test_adj = dl.test_adj
        dl.setup_test_compatibility(resampled=args.resampled)
    elif DATASET == 'ssense':
        dl = DataLoaderFashionGen()
        orig_train_features, adj_train, train_labels, train_r_indices, train_c_indices = dl.get_phase(
            'train')
        orig_val_features, adj_val, val_labels, val_r_indices, val_c_indices = dl.get_phase(
            'valid')
        orig_test_features, adj_test, test_labels, test_r_indices, test_c_indices = dl.get_phase(
            'test')
        adj_q, q_r_indices, q_c_indices, q_labels, q_ids, q_valid = dl.get_test_questions(
        )
        full_train_adj = dl.train_adj
        full_test_adj = dl.test_adj
        dl.setup_test_compatibility(resampled=args.resampled)
    else:
        raise NotImplementedError(
            'A data loader for dataset {} does not exist'.format(DATASET))

    NUMCLASSES = 2
    BN_AS_TRAIN = False
    ADJ_SELF_CONNECTIONS = True

    def norm_adj(adj_to_norm):
        return normalize_nonsym_adj(adj_to_norm)

    train_features, mean, std = dl.normalize_features(orig_train_features,
                                                      get_moments=True)
    val_features = dl.normalize_features(orig_val_features, mean=mean, std=std)
    test_features = dl.normalize_features(orig_test_features,
                                          mean=mean,
                                          std=std)

    train_support = get_degree_supports(adj_train,
                                        config['degree'],
                                        adj_self_con=ADJ_SELF_CONNECTIONS)
    val_support = get_degree_supports(adj_val,
                                      config['degree'],
                                      adj_self_con=ADJ_SELF_CONNECTIONS)
    test_support = get_degree_supports(adj_test,
                                       config['degree'],
                                       adj_self_con=ADJ_SELF_CONNECTIONS)

    for i in range(1, len(train_support)):
        train_support[i] = norm_adj(train_support[i])
        val_support[i] = norm_adj(val_support[i])
        test_support[i] = norm_adj(test_support[i])

    num_support = len(train_support)
    placeholders = {
        'row_indices':
        tf.compat.v1.placeholder(tf.int32, shape=(None, )),
        'col_indices':
        tf.compat.v1.placeholder(tf.int32, shape=(None, )),
        'dropout':
        tf.compat.v1.placeholder_with_default(0., shape=()),
        'weight_decay':
        tf.compat.v1.placeholder_with_default(0., shape=()),
        'is_train':
        tf.compat.v1.placeholder_with_default(True, shape=()),
        'support': [
            tf.compat.v1.sparse_placeholder(tf.float32, shape=(None, None))
            for sup in range(num_support)
        ],
        'node_features':
        tf.compat.v1.placeholder(tf.float32, shape=(None, None)),
        'labels':
        tf.compat.v1.placeholder(tf.float32, shape=(None, ))
    }

    model = CompatibilityGAE(placeholders,
                             input_dim=train_features.shape[1],
                             num_classes=NUMCLASSES,
                             num_support=num_support,
                             hidden=config['hidden'],
                             learning_rate=config['learning_rate'],
                             logging=True,
                             batch_norm=config['batch_norm'])

    # Construct feed dicts for train, val and test phases
    train_feed_dict = construct_feed_dict(placeholders, train_features,
                                          train_support, train_labels,
                                          train_r_indices, train_c_indices,
                                          config['dropout'])
    val_feed_dict = construct_feed_dict(placeholders,
                                        val_features,
                                        val_support,
                                        val_labels,
                                        val_r_indices,
                                        val_c_indices,
                                        0.,
                                        is_train=BN_AS_TRAIN)
    test_feed_dict = construct_feed_dict(placeholders,
                                         test_features,
                                         test_support,
                                         test_labels,
                                         test_r_indices,
                                         test_c_indices,
                                         0.,
                                         is_train=BN_AS_TRAIN)

    # Add ops to save and restore all the variables.
    saver = tf.compat.v1.train.Saver()

    def eval():
        # use this as a control value, if the model is ok, the value will be the same as in log
        val_avg_loss, val_acc, conf, pred = sess.run(
            [model.loss, model.accuracy, model.confmat,
             model.predict()],
            feed_dict=val_feed_dict)

        print("val_loss=", "{:.5f}".format(val_avg_loss), "val_acc=",
              "{:.5f}".format(val_acc))

    with tf.compat.v1.Session() as sess:
        saver.restore(sess, load_from + '/' + 'best_epoch.ckpt')

        count = 0
        preds = []
        labels = []

        # evaluate the the model for accuracy prediction
        eval()

        prob_act = tf.nn.sigmoid

        K = args.k
        for outfit in dl.comp_outfits:
            before_item = time.time()
            items, score = outfit

            num_new = test_features.shape[0]

            new_adj = sp.csr_matrix((num_new, num_new))  # no connections

            if args.k > 0:
                # add edges to the adj matrix
                available_adj = dl.test_adj.copy()
                available_adj = available_adj.tolil()

                i = 0
                for idx_from in items[:-1]:
                    for idx_to in items[i + 1:]:
                        # remove outfit edges, they won't be expanded
                        available_adj[idx_to, idx_from] = 0
                        available_adj[idx_from, idx_to] = 0
                    i += 1
                available_adj = available_adj.tocsr()
                available_adj.eliminate_zeros()

            if args.subset:  # use only a subset (of size 3) of the outfit
                items = np.random.choice(items, 3)

            new_features = test_features

            # predict edges between the items
            query_r = []
            query_c = []

            i = 0
            item_indexes = items
            for idx_from in item_indexes[:-1]:
                for idx_to in item_indexes[i + 1:]:
                    query_r.append(idx_from)
                    query_c.append(idx_to)
                i += 1

            if args.k > 0:
                G = Graph(available_adj)
                nodes_to_expand = np.unique(items)
                for node in nodes_to_expand:
                    edges = G.run_K_BFS(node, K)
                    for edge in edges:
                        u, v = edge
                        new_adj[u, v] = 1
                        new_adj[v, u] = 1

            query_r = np.array(query_r)
            query_c = np.array(query_c)

            new_adj = new_adj.tocsr()

            new_support = get_degree_supports(
                new_adj,
                config['degree'],
                adj_self_con=ADJ_SELF_CONNECTIONS,
                verbose=False)
            for i in range(1, len(new_support)):
                new_support[i] = norm_adj(new_support[i])
            new_support = [sparse_to_tuple(sup) for sup in new_support]

            new_feed_dict = construct_feed_dict(placeholders,
                                                new_features,
                                                new_support,
                                                train_labels,
                                                query_r,
                                                query_c,
                                                0.,
                                                is_train=BN_AS_TRAIN)

            pred = sess.run(prob_act(model.outputs), feed_dict=new_feed_dict)

            predicted_score = pred.mean()
            print("[{}] Mean scores between outfit: {:.4f}, label: {}".format(
                count, predicted_score, score))
            # TODO: remove this print
            print("Total Elapsed: {:.4f}".format(time.time() - before_item))
            count += 1

            preds.append(predicted_score)
            labels.append(score)

        preds = np.array(preds)
        labels = np.array(labels)

        AUC = compute_auc(preds, labels)

        # use this as a control value, if the model is ok, the value will be the same as in log
        eval()

        print('The AUC compat score is: {}'.format(AUC))

    print('Best val score saved in log: {}'.format(config['best_val_score']))
    print('Last val score saved in log: {}'.format(log['val']['acc'][-1]))

    print("mean positive prediction: {}".format(
        preds[labels.astype(bool)].mean()))
    print("mean negative prediction: {}".format(preds[np.logical_not(
        labels.astype(bool))].mean()))