def test_other_graphs(Gs, signals, lrn, data_state, models_state): med_err = np.zeros((Gs['n_graphs'], N_EXPS)) mse = np.zeros((Gs['n_graphs'], N_EXPS)) for i in range(Gs['n_graphs']): Gx, Gy = ds.perturbated_graphs(Gs['params'], Gs['create'], Gs['destroy'], pct=Gs['pct'], seed=SEED) data = ds.LinearDS2GSLinksPert(Gx, Gy, signals['samples'], signals['L'], signals['deltas'], median=signals['median'], same_coeffs=signals['same_coeffs']) data.load_state_dict(data_state, unit_norm=True) data.add_noise(signals['noise'], test_only=signals['test_only']) sign_dist = np.median(np.linalg.norm(data.train_X-data.train_Y, axis=1)) print('Distance signals:', sign_dist) data.to_tensor() # Create models for j, exp in enumerate(EXPS): model = create_model(Gx, Gy, exp, lrn) model.load_state_dict(models_state[j]) _, med_err[i, j], mse[i, j] = model.test(data.test_X, data.test_Y) print('Graph {}: {}-{} ({}): mse {} - MedianErr: {}' .format(i, j, exp['type'], model.count_params(), mse[i, j], med_err[i, j])) return med_err, mse
def train_models(Gs, signals, lrn): # Create data Gx, Gy = ds.perturbated_graphs(Gs['params'], Gs['create'], Gs['destroy'], pct=Gs['pct'], seed=SEED) data = ds.LinearDS2GSLinksPert(Gx, Gy, signals['samples'], signals['L'], signals['deltas'], median=signals['median'], same_coeffs=signals['same_coeffs']) data.to_unit_norm() data.add_noise(signals['noise'], test_only=signals['test_only']) sign_dist = np.median(np.linalg.norm(data.train_X-data.train_Y, axis=1)) print('Distance signals:', sign_dist) data.to_tensor() data_state = data.state_dict() # med_err = np.zeros(N_EXPS) # epochs = np.zeros(N_EXPS) # mse = np.zeros(N_EXPS) models_states = [] for i, exp in enumerate(EXPS): model = create_model(Gx, Gy, exp, lrn) # Fit models epochs, _, _ = model.fit(data.train_X, data.train_Y, data.val_X, data.val_Y) _, med_error, mse_error = model.test(data.test_X, data.test_Y) models_states.append(model.state_dict()) print('Original Graph {}-{} ({}): mse {} - MedianErr: {}' .format(i, exp['type'], model.count_params(), mse_error, med_error)) print() return data_state, models_states, Gx, Gy
def test_model(id, signals, nn_params, model_params): Gx, Gy = data_sets.perturbated_graphs(signals['g_params'], signals['eps1'], signals['eps2'], pct=signals['pct'], perm=signals['perm']) # Define the data model data = data_sets.LinearDS2GSLinksPert(Gx, Gy, signals['N_samples'], signals['L_filter'], signals['g_params']['k'], # k is n_delts median=signals['median']) data.to_unit_norm() data.add_noise(signals['noise'], test_only=signals['test_only']) data.to_tensor() Gx.compute_laplacian('normalized') Gy.compute_laplacian('normalized') archit = GIGOArch(Gx.L.todense(), Gy.L.todense(), nn_params['Fi'], nn_params['Fo'], nn_params['Ki'], nn_params['Ko'], nn_params['C'], nn_params['nonlin'], nn_params['last_act_fn'], nn_params['batch_norm'], nn_params['arch_info']) model_params['arch'] = archit model = Model(**model_params) t_init = time.time() epochs, _, _ = model.fit(data.train_X, data.train_Y, data.val_X, data.val_Y) t_conv = time.time() - t_init mean_err, med_err, mse = model.test(data.test_X, data.test_Y) print("DONE {}: MSE={} - Mean Err={} - Median Err={} - Params={} - t_conv={} - epochs={}".format( id, mse, mean_err, med_err, model.count_params(), round(t_conv, 4), epochs )) return mse, mean_err, med_err, model.count_params(), t_conv, epochs
def test_graph_reproducibility(self): Gx, Gy = ds.perturbated_graphs(self.G_params, 10, 10, pct=True, seed=SEED) print('Link x:', Gx.Ne) print('Link y:', Gy.Ne) self.assertEqual(1664, Gx.Ne) self.assertEqual(1664, Gy.Ne) print('Diff links:', np.sum(Gx.A != Gy.A)/2/Gx.Ne) self.assertAlmostEqual(0.19951923076923078, np.sum(Gx.A != Gy.A)/2/Gx.Ne)
def test_permute_graph(self): Gx, Gy = ds.perturbated_graphs(self.G_params, 0, 0, perm=True, pct=True, seed=SEED) Ax = Gx.W.todense() Ay = Gy.W.todense() comm_X = Gx.info['node_com'] comm_Y = Gy.info['node_com'] P = Gy.info['perm_matrix'] self.assertFalse(np.array_equal(Ax, Ay)) self.assertFalse(np.array_equal(comm_X, comm_Y)) self.assertTrue(np.array_equal(np.eye(Gx.N), P.dot(P.T))) self.assertTrue(np.array_equal(Ax, P.T.dot(Ay).dot(P))) self.assertTrue(np.array_equal(comm_X, P.T.dot(comm_Y)))
def setUp(self): np.random.seed(SEED) self.G_params = {} self.G_params['type'] = ds.SBM self.G_params['N'] = 32 self.G_params['k'] = 4 self.G_params['p'] = 0.8 self.G_params['q'] = 0.1 self.G_params['type_z'] = ds.RAND self.eps1 = 5 self.eps2 = 5 self.Gx, self.Gy = ds.perturbated_graphs(self.G_params, self.eps1, self.eps2, seed=SEED)
def test_permutated_S(self): n_samps = [50, 20, 20] L = 6 n_delts = self.G_params['k'] Gx, Gy = ds.perturbated_graphs(self.G_params, 0, 0, perm=True, seed=SEED) data = ds.LinearDS2GSLinksPert(Gx, Gy, n_samps, L, n_delts) P = data.Gy.info['perm_matrix'] self.assertFalse(np.array_equal(data.Hx, data.Hy)) self.assertFalse(np.array_equal(data.train_Sx, data.train_Sy)) self.assertFalse(np.array_equal(data.val_Sx, data.val_Sy)) self.assertFalse(np.array_equal(data.test_Sx, data.test_Sy)) self.assertTrue(np.array_equal(data.train_Sx, data.train_Sy.dot(P))) self.assertTrue(np.array_equal(data.val_Sx, data.val_Sy.dot(P))) self.assertTrue(np.array_equal(data.test_Sx, data.test_Sy.dot(P)))
def test_model(id, signals, nn_params, model_params): Gx, Gy = data_sets.perturbated_graphs(signals['g_params'], signals['eps1'], signals['eps2'], pct=signals['pct'], perm=signals['perm']) # Define the data model data = data_sets.LinearDS2GSLinksPert( Gx, Gy, signals['N_samples'], signals['L_filter'], signals['g_params']['k'], # k is n_delts median=signals['median']) data.to_unit_norm() data.add_noise(signals['noise'], test_only=signals['test_only']) data.to_tensor() if nn_params['arch_type'] == "basic": Gx.compute_laplacian('normalized') archit = BasicArch(Gx.L.todense(), nn_params['F'], nn_params['K'], nn_params['M'], nn_params['nonlin'], ARCH_INFO) elif nn_params['arch_type'] == "mlp": archit = MLP(nn_params['F'], nn_params['nonlin'], ARCH_INFO) elif nn_params['arch_type'] == "conv": archit = ConvNN(N, nn_params['F'], nn_params['K'], nn_params['nonlin'], nn_params['M'], ARCH_INFO) elif nn_params['arch_type'] == "linear": archit = MLP(nn_params['F'], nn_params['nonlin'], ARCH_INFO) else: raise RuntimeError("arch_type has to be either basic, mlp or conv") model_params['arch'] = archit model = Model(**model_params) t_init = time.time() epochs, _, _ = model.fit(data.train_X, data.train_Y, data.val_X, data.val_Y) t_conv = time.time() - t_init mean_err, med_err, mse = model.test(data.test_X, data.test_Y) print( "DONE {}: MSE={} - Mean Err={} - Median Err={} - Params={} - t_conv={} - epochs={}" .format(id, mse, mean_err, med_err, model.count_params(), round(t_conv, 4), epochs), flush=True) return mse, mean_err, med_err, model.count_params(), t_conv, epochs
def test_percentage_perturbation(self): create = destroy = 5 up_err_margin = (create + destroy + 2)/100 bottom_err_margin = (create + destroy - 2)/100 for i in range(10): Gx, Gy = ds.perturbated_graphs(self.G_params, create, destroy, pct=True, seed=SEED) Ax = Gx.W.todense() Ay = Gy.W.todense() print('diff:', np.sum(Ax != Ay)/Gx.Ne/2) self.assertFalse(Gx.is_directed()) self.assertTrue(Gx.is_connected()) self.assertEqual(np.sum(np.diag(Ax)), 0) self.assertFalse(Gy.is_directed()) self.assertTrue(Gy.is_connected()) self.assertEqual(np.sum(np.diag(Ay)), 0) self.assertTrue(np.sum(Ax != Ay)/Gx.Ne/2 <= up_err_margin) self.assertTrue(np.sum(Ax != Ay)/Gx.Ne/2 >= bottom_err_margin)
def test_S_ER(self): n_samps = [50, 20, 20] L = 6 n_delts = 6 self.G_params['type'] = ds.ER Gx, Gy = ds.perturbated_graphs(self.G_params, self.eps1, self.eps2, seed=SEED) data = ds.LinearDS2GS(Gx, Gy, n_samps, L, n_delts) data.to_unit_norm() self.assertFalse(np.array_equal(data.Hx, data.Hy)) for i in range(n_samps[0]): self.assertLessEqual(np.sum(data.train_Sx[i,:][data.train_Sx[i,:]!=0]), n_delts) self.assertLessEqual(np.sum(data.train_Sy[i,:][data.train_Sy[i,:]!=0]), n_delts) for i in range(n_samps[1]): self.assertLessEqual(np.sum(data.train_Sx[i,:][data.train_Sx[i,:]!=0]), n_delts) self.assertLessEqual(np.sum(data.train_Sy[i,:][data.train_Sy[i,:]!=0]), n_delts) for i in range(n_samps[2]): self.assertLessEqual(np.sum(data.train_Sx[i,:][data.train_Sx[i,:]!=0]), n_delts) self.assertLessEqual(np.sum(data.train_Sy[i,:][data.train_Sy[i,:]!=0]), n_delts)
def test_probability_perturbation(self): create = 0.0005 destroy = 0.05 n_graphs = 10 diff_links = np.zeros(n_graphs) exp_err = np.zeros(n_graphs) margin = 6 for i in range(n_graphs): Gx, Gy = ds.perturbated_graphs(self.G_params, create, destroy, pct=False, seed=SEED) Ax = Gx.W.todense() Ay = Gy.W.todense() self.assertFalse(Gx.is_directed()) self.assertTrue(Gx.is_connected()) self.assertEqual(np.sum(np.diag(Ax)), 0) self.assertFalse(Gy.is_directed()) self.assertTrue(Gy.is_connected()) self.assertEqual(np.sum(np.diag(Ay)), 0) diff_links[i] = np.sum(Ax != Ay)/Gx.Ne/2 exp_err[i] = (create*(Gx.N*(Gx.N-1)/2-Gx.Ne) + Gx.Ne*destroy)/Gx.N self.assertTrue(np.mean(diff_links) <= (np.mean(exp_err)+margin)/100) self.assertTrue(np.mean(diff_links) >= (np.mean(exp_err)-margin)/100)
def estimate_signals(i, G_params, eps_c, eps_d, n_samples, L, nodes_enc, nodes_dec, ups, feat_enc, feat_dec, feat_only_conv): # Create graphs Gx, Gy = data_sets.perturbated_graphs(G_params, eps_c, eps_d, seed=SEED) # Create graph signals data = data_sets.LinearDS2GS(Gx, Gy, n_samples, L, 3 * G_params['k'], median=True) data.to_unit_norm() print('Median Diff between Y and X:', np.median(np.linalg.norm((data.train_X - data.train_Y)**2, 1))) X = data.train_X Beta = np.linalg.pinv(X.T.dot(X)).dot(X.T).dot(data.train_Y) test_Y = data.test_Y est_Y_test = data.test_X.dot(Beta) test_err = np.sum( (est_Y_test - test_Y)**2, axis=1) / np.linalg.norm(data.test_Y)**2 print('Linear model: mean err: {} - median: {}'.format( np.mean(test_err), np.median(test_err))) data.to_tensor() # Obtein clusters cluster_x = gc.MultiResGraphClustering(Gx, nodes_enc, k=4, up_method=None) cluster_y = gc.MultiResGraphClustering(Gy, nodes_dec, k=4, up_method=ups) # Standar ConvAutoenc net = architecture.GraphEncoderDecoder(feat_enc, [Gx.N] * 7, cluster_x.Ds, feat_dec, [Gx.N] * 7, cluster_y.Us, feat_only_conv, As_dec=cluster_y.As, last_act_fn=nn.Tanh(), act_fn=nn.Tanh()) model = Model(net, decay_rate=.9, epochs=25, batch_size=100, learning_rate=0.05, verbose=True, eval_freq=1, max_non_dec=5) print('Model parameters: ', model.count_params()) model.fit(data.train_X, data.train_Y, data.val_X, data.val_Y) mean_err, median_err, mse = model.test(data.test_X, data.test_Y) print( 'Autoencoder: Graph {}: N: {} Mean MSE: {} - Mean Err: {} - Median Err: {}' .format(i, Gx.N, mse, mean_err, median_err)) # Graph Autoenc net = architecture.GraphEncoderDecoder(feat_enc, cluster_x.sizes, cluster_x.Ds, feat_dec, cluster_y.sizes, cluster_y.Us, feat_only_conv, As_dec=cluster_y.As, last_act_fn=nn.Tanh(), act_fn=nn.Tanh()) model = Model(net, decay_rate=.9, epochs=25, batch_size=100, learning_rate=0.05, verbose=True, eval_freq=1, max_non_dec=5) print('Model parameters: ', model.count_params()) model.fit(data.train_X, data.train_Y, data.val_X, data.val_Y) mean_err, median_err, mse = model.test(data.test_X, data.test_Y) print( 'GRAPH ENC-DEC Graph {}: N: {} Mean MSE: {} - Mean Err: {} - Median Err: {}' .format(i, Gx.N, mse, mean_err, median_err)) return mean_err, mse, model.count_params()
def estimate_signals(id, G_params, n_samples, L, nodes_enc, nodes_dec, ups, feat_enc, feat_dec, feat_only_conv): # Create graphs Gx, Gy = data_sets.perturbated_graphs(G_params, creat, dest, pct=pct, seed=SEED) diff_links = np.sum(Gx.A != Gy.A) / 2 / Gx.Ne * 100 print('Links different(%):', diff_links) # Create graph signals data = data_sets.NonLinearDS2GS(Gx, Gy, n_samples, L, deltas, median=median, same_coeffs=same_coeffs) data.to_unit_norm() data.add_noise(p_n, test_only=True) mean_dist = np.median(np.linalg.norm(data.train_X - data.train_Y, axis=1)) print('Distance signals:', mean_dist) data.to_tensor() N = G_params['N'] k = G_params['k'] model = LinearModel(N) model.fit(data.train_X, data.train_Y, data.val_X, data.val_Y) mean_err, med_err, mse = model.test(data.test_X, data.test_Y) print( 'LINEAR Graph {}: N: {} Mean MSE: {} - Mean Err: {} - Median Err: {}'. format(id, Gx.N, mse, mean_err, med_err)) # Obtein clusters cluster_x = gc.MultiResGraphClustering(Gx, nodes_enc, k=k, up_method=ups) cluster_y = gc.MultiResGraphClustering(Gy, nodes_dec, k=k, up_method=ups) # Graph Autoenc net = architecture.GraphEncoderDecoder(feat_enc, cluster_x.sizes, cluster_x.Ds, feat_dec, cluster_y.sizes, cluster_y.Us, feat_only_conv, As_dec=cluster_y.As, As_enc=cluster_x.As, ups=ups, last_act_fn=nn.Tanh(), downs=ups, act_fn=nn.Tanh()) model = Model(net, decay_rate=dr, epochs=epochs, batch_size=bs, learning_rate=lr, verbose=False, eval_freq=1, max_non_dec=10) print('Model parameters: ', model.count_params()) iters, _, _ = model.fit(data.train_X, data.train_Y, data.val_X, data.val_Y) mean_err, med_err, mse = model.test(data.test_X, data.test_Y) print('G: {}, ({}): epochs {} - mse {} - MedianErr: {}'.format( id, model.count_params(), iters, mse, med_err)) return mean_err, mse, med_err, diff_links, mean_dist, iters
def run(id, Gs, signals, lrn, p_n): Gx, Gy = ds.perturbated_graphs(Gs['params'], Gs['pct_val'][0], Gs['pct_val'][1], pct=Gs['pct'], seed=SEED) data = ds.LinearDS2GS(Gx, Gy, signals['samples'], signals['L'], signals['deltas'], median=signals['median'], same_coeffs=signals['same_coeffs']) # data = ds.NonLinearDS2GS(Gx, Gy, signals['samples'], signals['L'], # signals['deltas'], median=signals['median'], # same_coeffs=signals['same_coeffs']) data.to_unit_norm() data.add_noise(p_n, test_only=signals['test_only']) median_dist = np.median(np.linalg.norm(data.train_X - data.train_Y, axis=1)) print('Signal {}: distance {}'.format(id, median_dist)) data.to_tensor() med_err = np.zeros(N_EXPS) mse = np.zeros(N_EXPS) epochs = np.zeros(N_EXPS) for i, exp in enumerate(EXPS): clust_x = gc.MultiResGraphClustering(Gx, exp['n_enc'], k=exp['n_enc'][-1], up_method=exp['downs']) clust_y = gc.MultiResGraphClustering(Gy, exp['n_dec'], k=exp['n_enc'][-1], up_method=exp['ups']) net = GraphEncoderDecoder(exp['f_enc'], clust_x.sizes, clust_x.Ds, exp['f_dec'], clust_y.sizes, clust_y.Us, exp['f_conv'], As_dec=clust_y.As, As_enc=clust_x.As, act_fn=lrn['af'], last_act_fn=lrn['laf'], ups=exp['ups'], downs=exp['downs']) model = Model(net, learning_rate=lrn['lr'], decay_rate=lrn['dr'], batch_size=lrn['batch'], epochs=lrn['epochs'], eval_freq=EVAL_F, max_non_dec=lrn['non_dec'], verbose=VERBOSE) epochs[i], _, _ = model.fit(data.train_X, data.train_Y, data.val_X, data.val_Y) _, med_err[i], mse[i] = model.test(data.test_X, data.test_Y) print('G: {}, {}-{} ({}): epochs {} - mse {} - MedianErr: {}'.format( id, i, exp['type'], model.count_params(), epochs[i], mse[i], med_err[i])) return med_err, mse, epochs
def run(id, Gs, Signals, lrn, samples): Gx, Gy = ds.perturbated_graphs(Gs['params'], Gs['pct_val'][0], Gs['pct_val'][1], pct=Gs['pct'], perm=Gs['perm'], seed=SEED) data = ds.LinearDS2GSLinksPert(Gx, Gy, samples, Signals['L'], Signals['deltas'], median=Signals['median'], same_coeffs=Signals['same_coeffs']) data.to_unit_norm() data.add_noise(Signals['noise'], test_only=Signals['test_only']) data.to_tensor() params = np.zeros(N_EXPS) epochs = np.zeros(N_EXPS) med_err = np.zeros(N_EXPS) mse = np.zeros(N_EXPS) for i, exp in enumerate(EXPS): if exp['type'] == 'Linear': model = LinearModel(exp['N']) elif exp['type'] == 'Enc_Dec': clust_x = gc.MultiResGraphClustering(Gx, exp['n_enc'], k=exp['n_enc'][-1], up_method=exp['downs']) clust_y = gc.MultiResGraphClustering(Gy, exp['n_dec'], k=exp['n_enc'][-1], up_method=exp['ups']) net = GraphEncoderDecoder(exp['f_enc'], clust_x.sizes, clust_x.Ds, exp['f_dec'], clust_y.sizes, clust_y.Us, exp['f_conv'], As_dec=clust_y.As, As_enc=clust_x.As, act_fn=lrn['af'], K_dec=exp['K_dec'], K_enc=exp['K_enc'], last_act_fn=lrn['laf'], ups=exp['ups'], downs=exp['downs']) elif exp['type'] == 'AutoConv': net = ConvAutoencoder(exp['f_enc'], exp['kernel_enc'], exp['f_dec'], exp['kernel_dec']) elif exp['type'] == 'AutoFC': net = FCAutoencoder(exp['n_enc'], exp['n_dec'], bias=exp['bias']) else: raise RuntimeError('Unknown experiment type') if exp['type'] != 'Linear': model = Model(net, learning_rate=lrn['lr'], decay_rate=lrn['dr'], batch_size=lrn['batch'], epochs=lrn['epochs'], eval_freq=EVAL_F, max_non_dec=lrn['non_dec'], verbose=VERBOSE, early_stop=exp['early_stop']) epochs[i], _, _ = model.fit(data.train_X, data.train_Y, data.val_X, data.val_Y) _, med_err[i], mse[i] = model.test(data.test_X, data.test_Y) params[i] = model.count_params() print('G: {}, {}-{} ({}): epochs {} - mse {} - MedianErr: {}'.format( id, i, exp['type'], params[i], epochs[i], mse[i], med_err[i])) return params, med_err, mse