def fit(self, *, timeout: float = None, iterations: int = None) -> CallResult[None]:
        if self._fitted:
            return base.CallResult(None)

        xhat = self._inputs_1
        yhat = self._inputs_2

        seeds = self._reference['match'].astype(int).astype(bool)

        xhat_seed_names = self._reference[self._reference.columns[1]][seeds].values
        yhat_seed_names = self._reference[self._reference.columns[2]][seeds].values

        n_seeds = len(xhat_seed_names)

        x_seeds = np.zeros(n_seeds)
        y_seeds = np.zeros(n_seeds)
        for i in range(n_seeds):
            x_seeds[i] = np.where(xhat[xhat.columns[0]] == xhat_seed_names[i])[0][0]

            y_seeds[i] = np.where(yhat[yhat.columns[0]] == yhat_seed_names[i])[0][0]

        # do this more carefully TODO
        xhat_embedding = xhat.values[:,1:].astype(np.float32)
        yhat_embedding = yhat.values[:,1:].astype(np.float32)

        S_xx = np.exp(-cdist(xhat_embedding, xhat_embedding, ))
        S_yy = np.exp(-cdist(yhat_embedding, yhat_embedding, ))

        gmp = GraphMatch(shuffle_input=False)
        match = gmp.fit_predict(S_xx, S_yy, x_seeds, y_seeds)
        self._match = container.ndarray(match)
        self._fitted = True

        return CallResult(None)
Exemplo n.º 2
0
    def test_padding(self):
        n = 50
        p = 0.4

        G1 = er_np(n=n, p=p)
        G2 = G1[:-2, :-2]  # remove two nodes
        gmp_adopted = GMP(padding="adopted")
        res = gmp_adopted.fit(G1, G2)

        self.assertTrue(0.95 <= (sum(res.perm_inds_ == np.arange(n)) / n))
Exemplo n.º 3
0
    def test_padding(self):
        n = 50
        p = 0.4

        np.random.seed(1)
        G1 = er_np(n=n, p=p)
        G2 = G1[:(n - 1), :(n - 1)]  # remove two nodes
        gmp_adopted = GMP(padding="adopted")
        res = gmp_adopted.fit(G1, G2)

        assert 1.0 == (sum(res.perm_inds_ == np.arange(n)) / n)
Exemplo n.º 4
0
def rank_graph_match_flow(A, n_init=100, max_iter=50, eps=1e-5, **kwargs):
    n = len(A)
    initial_perm = rank_signal_flow(A)
    init = np.eye(n)[initial_perm]
    match_mat = np.zeros((n, n))
    triu_inds = np.triu_indices(n, k=1)
    match_mat[triu_inds] = 1
    gm = GraphMatch(n_init=n_init,
                    max_iter=max_iter,
                    init="barycenter",
                    eps=eps,
                    **kwargs)
    perm_inds = gm.fit_predict(match_mat, A)
    return perm_inds
Exemplo n.º 5
0
    def test_custom_init(self):
        A, B = self._get_AB()
        n = len(A)
        pi = np.array([7, 5, 1, 3, 10, 4, 8, 6, 9, 11, 2, 12]) - [1] * n
        custom_init = np.eye(n)
        custom_init = custom_init[pi]

        gm = GMP(n_init=1,
                 init=custom_init,
                 max_iter=30,
                 shuffle_input=True,
                 gmp=False)
        gm.fit(A, B)

        self.assertTrue((gm.perm_inds_ == pi).all())
        self.assertEqual(gm.score_, 11156)
Exemplo n.º 6
0
def rank_graph_match_flow(A, n_init=10, max_iter=30, eps=1e-4, **kwargs):
    n = len(A)
    try:
        initial_perm = rank_signal_flow(A)
        init = np.eye(n)[initial_perm]
    except np.linalg.LinAlgError:
        print("SVD did not converge in signal flow")
        init = np.full((n, n), 1 / n)
    match_mat = np.zeros((n, n))
    triu_inds = np.triu_indices(n, k=1)
    match_mat[triu_inds] = 1
    gm = GraphMatch(n_init=n_init,
                    max_iter=max_iter,
                    init=init,
                    eps=eps,
                    **kwargs)
    perm_inds = gm.fit_predict(match_mat, A)
    return perm_inds
Exemplo n.º 7
0
    def test_custom_init_seeds(self):
        A, B = self._get_AB()
        n = len(A)
        pi_original = np.array([7, 5, 1, 3, 10, 4, 8, 6, 9, 11, 2, 12]) - 1
        pi = np.array([5, 1, 3, 10, 4, 8, 6, 9, 11, 2, 12]) - 1

        pi[pi > 6] -= 1

        # use seed 0 in A to 7 in B
        seeds_A = [0]
        seeds_B = [6]
        custom_init = np.eye(n - 1)
        custom_init = custom_init[pi]

        gm = GMP(n_init=1,
                 init=custom_init,
                 max_iter=30,
                 shuffle_input=True,
                 gmp=False)
        gm.fit(A, B, seeds_A=seeds_A, seeds_B=seeds_B)

        self.assertTrue((gm.perm_inds_ == pi_original).all())
        self.assertEqual(gm.score_, 11156)
Exemplo n.º 8
0
# Under a given sorting (permutation) of the adjacency matrix, any game (edge) that is
# an upset will fall in the lower triangle, because a lower-ranked team beat a higher-ranked
# team. We can therefore create a ranking by graph matching the adjacency matrix to a
# flat upper triangular matrix, thereby inducing a sorting/ranking that minimizes the
# number of upsets.
# %%
adj = adjacency_df.values

# constructing the match matrix
match_mat = np.zeros_like(adj)
triu_inds = np.triu_indices(len(match_mat), k=1)
match_mat[triu_inds] = 1

# running graph matching
np.random.seed(8888)
gm = GraphMatch(n_init=500, max_iter=150, eps=1e-6)
gm.fit(match_mat, adj)
perm_inds = gm.perm_inds_

adj_matched = adj[perm_inds][:, perm_inds]
upsets = adj_matched[triu_inds[::-1]].sum()
n_games = adj_matched.sum()

print(f"Number of games: {n_games}")
print(f"Number of non-upsets (graph matching score): {gm.score_}")
print(f"Number of upsets: {upsets}")
print(f"Upset ratio: {upsets/n_games}")

print()
print("Ranking:")
print(teams[perm_inds])
Exemplo n.º 9
0
 def test_parallel(self):
     A, B = self._get_AB()
     gmp = GMP(gmp=False, n_init=2, n_jobs=2)
     gmp.fit(A, B)
     score = gmp.score_
     self.assertTrue(11156 <= score < 13500)
Exemplo n.º 10
0
plot_adjs(ll_adj, rr_adj, title="Known alignment")

#%% [markdown]
# ## Evaluate the pairs using graph matching
# Here, we run graph matching either starting from the known pairs (idendtity matrix as
# the initial position for FAQ) or from the barycenter. The goal is to evaluate how good
# the known pairs are relative to what we'd predict from graph matching.
#%%
np.random.seed(888888)
n = len(ll_adj)

currtime = time.time()
gm = GraphMatch(n_init=10,
                init="barycenter",
                max_iter=40,
                shuffle_input=True,
                eps=1e-4)
gm.fit(ll_adj, rr_adj)
print(f"{time.time() - currtime:.3f} seconds elapsed to run graph matching.")
perm_inds = gm.perm_inds_
match_ratio = (perm_inds == np.arange(n)).mean()
print("Match ratio when initializing from barycenter:")
print(match_ratio)
print("GMP objective function after initializing from barycenter:")
print(gm.score_)
print()

currtime = time.time()
gm = GraphMatch(n_init=10,
                init=np.eye(n),
    fun = res["fun"]
print(f"{time.time() - currtime:.3f} seconds elapsed.")

results = pd.DataFrame(rows)

fig, axs = plt.subplots(2, 1, figsize=(8, 8))
sns.lineplot(data=results, x="iteration", y="fun", ax=axs[0])
sns.lineplot(data=results, x="iteration", y="match_ratio", ax=axs[1])

#%% [markdown]
# ## Start graph matching from the known pairs to see if we can improve
#%%

gm = GraphMatch(n_init=25,
                init=np.eye(n),
                max_iter=100,
                shuffle_input=True,
                eps=1e-4)
gm.fit(ll_adj, rr_adj)
perm_inds = gm.perm_inds_
match_ratio = (perm_inds == np.arange(n)).mean()
print("Match ratio when initializing from known pairs:")
print(match_ratio)
print("GMP objective function after initializing from known pairs:")
print(gm.score_)
print("GMP objective function from known pairs:")
print((ll_adj * rr_adj).sum())

# %% [markdown]
# ## End
#%%