def fit(self, *, timeout: float = None, iterations: int = None) -> CallResult[None]: if self._fitted: return base.CallResult(None) xhat = self._inputs_1 yhat = self._inputs_2 seeds = self._reference['match'].astype(int).astype(bool) xhat_seed_names = self._reference[self._reference.columns[1]][seeds].values yhat_seed_names = self._reference[self._reference.columns[2]][seeds].values n_seeds = len(xhat_seed_names) x_seeds = np.zeros(n_seeds) y_seeds = np.zeros(n_seeds) for i in range(n_seeds): x_seeds[i] = np.where(xhat[xhat.columns[0]] == xhat_seed_names[i])[0][0] y_seeds[i] = np.where(yhat[yhat.columns[0]] == yhat_seed_names[i])[0][0] # do this more carefully TODO xhat_embedding = xhat.values[:,1:].astype(np.float32) yhat_embedding = yhat.values[:,1:].astype(np.float32) S_xx = np.exp(-cdist(xhat_embedding, xhat_embedding, )) S_yy = np.exp(-cdist(yhat_embedding, yhat_embedding, )) gmp = GraphMatch(shuffle_input=False) match = gmp.fit_predict(S_xx, S_yy, x_seeds, y_seeds) self._match = container.ndarray(match) self._fitted = True return CallResult(None)
def test_padding(self): n = 50 p = 0.4 G1 = er_np(n=n, p=p) G2 = G1[:-2, :-2] # remove two nodes gmp_adopted = GMP(padding="adopted") res = gmp_adopted.fit(G1, G2) self.assertTrue(0.95 <= (sum(res.perm_inds_ == np.arange(n)) / n))
def test_padding(self): n = 50 p = 0.4 np.random.seed(1) G1 = er_np(n=n, p=p) G2 = G1[:(n - 1), :(n - 1)] # remove two nodes gmp_adopted = GMP(padding="adopted") res = gmp_adopted.fit(G1, G2) assert 1.0 == (sum(res.perm_inds_ == np.arange(n)) / n)
def rank_graph_match_flow(A, n_init=100, max_iter=50, eps=1e-5, **kwargs): n = len(A) initial_perm = rank_signal_flow(A) init = np.eye(n)[initial_perm] match_mat = np.zeros((n, n)) triu_inds = np.triu_indices(n, k=1) match_mat[triu_inds] = 1 gm = GraphMatch(n_init=n_init, max_iter=max_iter, init="barycenter", eps=eps, **kwargs) perm_inds = gm.fit_predict(match_mat, A) return perm_inds
def test_custom_init(self): A, B = self._get_AB() n = len(A) pi = np.array([7, 5, 1, 3, 10, 4, 8, 6, 9, 11, 2, 12]) - [1] * n custom_init = np.eye(n) custom_init = custom_init[pi] gm = GMP(n_init=1, init=custom_init, max_iter=30, shuffle_input=True, gmp=False) gm.fit(A, B) self.assertTrue((gm.perm_inds_ == pi).all()) self.assertEqual(gm.score_, 11156)
def rank_graph_match_flow(A, n_init=10, max_iter=30, eps=1e-4, **kwargs): n = len(A) try: initial_perm = rank_signal_flow(A) init = np.eye(n)[initial_perm] except np.linalg.LinAlgError: print("SVD did not converge in signal flow") init = np.full((n, n), 1 / n) match_mat = np.zeros((n, n)) triu_inds = np.triu_indices(n, k=1) match_mat[triu_inds] = 1 gm = GraphMatch(n_init=n_init, max_iter=max_iter, init=init, eps=eps, **kwargs) perm_inds = gm.fit_predict(match_mat, A) return perm_inds
def test_custom_init_seeds(self): A, B = self._get_AB() n = len(A) pi_original = np.array([7, 5, 1, 3, 10, 4, 8, 6, 9, 11, 2, 12]) - 1 pi = np.array([5, 1, 3, 10, 4, 8, 6, 9, 11, 2, 12]) - 1 pi[pi > 6] -= 1 # use seed 0 in A to 7 in B seeds_A = [0] seeds_B = [6] custom_init = np.eye(n - 1) custom_init = custom_init[pi] gm = GMP(n_init=1, init=custom_init, max_iter=30, shuffle_input=True, gmp=False) gm.fit(A, B, seeds_A=seeds_A, seeds_B=seeds_B) self.assertTrue((gm.perm_inds_ == pi_original).all()) self.assertEqual(gm.score_, 11156)
# Under a given sorting (permutation) of the adjacency matrix, any game (edge) that is # an upset will fall in the lower triangle, because a lower-ranked team beat a higher-ranked # team. We can therefore create a ranking by graph matching the adjacency matrix to a # flat upper triangular matrix, thereby inducing a sorting/ranking that minimizes the # number of upsets. # %% adj = adjacency_df.values # constructing the match matrix match_mat = np.zeros_like(adj) triu_inds = np.triu_indices(len(match_mat), k=1) match_mat[triu_inds] = 1 # running graph matching np.random.seed(8888) gm = GraphMatch(n_init=500, max_iter=150, eps=1e-6) gm.fit(match_mat, adj) perm_inds = gm.perm_inds_ adj_matched = adj[perm_inds][:, perm_inds] upsets = adj_matched[triu_inds[::-1]].sum() n_games = adj_matched.sum() print(f"Number of games: {n_games}") print(f"Number of non-upsets (graph matching score): {gm.score_}") print(f"Number of upsets: {upsets}") print(f"Upset ratio: {upsets/n_games}") print() print("Ranking:") print(teams[perm_inds])
def test_parallel(self): A, B = self._get_AB() gmp = GMP(gmp=False, n_init=2, n_jobs=2) gmp.fit(A, B) score = gmp.score_ self.assertTrue(11156 <= score < 13500)
plot_adjs(ll_adj, rr_adj, title="Known alignment") #%% [markdown] # ## Evaluate the pairs using graph matching # Here, we run graph matching either starting from the known pairs (idendtity matrix as # the initial position for FAQ) or from the barycenter. The goal is to evaluate how good # the known pairs are relative to what we'd predict from graph matching. #%% np.random.seed(888888) n = len(ll_adj) currtime = time.time() gm = GraphMatch(n_init=10, init="barycenter", max_iter=40, shuffle_input=True, eps=1e-4) gm.fit(ll_adj, rr_adj) print(f"{time.time() - currtime:.3f} seconds elapsed to run graph matching.") perm_inds = gm.perm_inds_ match_ratio = (perm_inds == np.arange(n)).mean() print("Match ratio when initializing from barycenter:") print(match_ratio) print("GMP objective function after initializing from barycenter:") print(gm.score_) print() currtime = time.time() gm = GraphMatch(n_init=10, init=np.eye(n),
fun = res["fun"] print(f"{time.time() - currtime:.3f} seconds elapsed.") results = pd.DataFrame(rows) fig, axs = plt.subplots(2, 1, figsize=(8, 8)) sns.lineplot(data=results, x="iteration", y="fun", ax=axs[0]) sns.lineplot(data=results, x="iteration", y="match_ratio", ax=axs[1]) #%% [markdown] # ## Start graph matching from the known pairs to see if we can improve #%% gm = GraphMatch(n_init=25, init=np.eye(n), max_iter=100, shuffle_input=True, eps=1e-4) gm.fit(ll_adj, rr_adj) perm_inds = gm.perm_inds_ match_ratio = (perm_inds == np.arange(n)).mean() print("Match ratio when initializing from known pairs:") print(match_ratio) print("GMP objective function after initializing from known pairs:") print(gm.score_) print("GMP objective function from known pairs:") print((ll_adj * rr_adj).sum()) # %% [markdown] # ## End #%%