def test_add_single_result(): """Test adding results for a single target/process.""" data = _get_discrete_gauss_data() nw = MultivariateTE() res_network = nw.analyse_single_target(settings=settings, data=data, target=1) # Test adding target results that already exists with pytest.raises(RuntimeError): res_network._add_single_result(target=1, settings=res_network.settings, results={}) # Test adding target results with unequal settings settings_test = cp.deepcopy(res_network.settings) settings_test.add_conditionals = 'Test' with pytest.raises(RuntimeError): res_network._add_single_result(target=0, settings=settings_test, results=res_network._single_target[1]) # Test adding a target with additional settings, results.settings should be # updated settings_test = cp.deepcopy(res_network.settings) settings_test.new_setting = 'Test' res_network._add_single_result(target=0, settings=settings_test, results=res_network._single_target[1]) assert 'new_setting' in res_network.settings.keys(), ( 'Settings dict was not updated.') assert res_network.settings.new_setting == 'Test', ( 'Settings dict was not updated correctly.')
def test_analytical_surrogates(): # Test generation of analytical surrogates. # Generate data and discretise it such that we can use analytical # surrogates. expected_mi, source1, source2, target = _get_gauss_data(covariance=0.4) settings = {'discretise_method': 'equal', 'n_discrete_bins': 5} est = JidtDiscreteCMI(settings) source_dis, target_dis = est._discretise_vars(var1=source1, var2=target) data = Data(np.hstack((source_dis, target_dis)), dim_order='sp', normalise=False) settings = { 'cmi_estimator': 'JidtDiscreteCMI', 'n_discrete_bins': 5, # alphabet size of the variables analysed 'n_perm_max_stat': 100, 'n_perm_min_stat': 21, 'n_perm_omnibus': 21, 'n_perm_max_seq': 21, 'max_lag_sources': 5, 'min_lag_sources': 1, 'max_lag_target': 5 } nw = MultivariateTE() res = nw.analyse_single_target(settings, data, target=1) # Check if generation of analytical surrogates is documented in the # settings. assert res.settings.analytical_surrogates, ( 'Surrogates were not created analytically.')
def test_add_single_result(): """Test adding results for a single target/process.""" data = _generate_gauss_data() nw = MultivariateTE() res_network = nw.analyse_single_target( settings=settings, data=data, target=1) # Test adding target results that already exists with pytest.raises(RuntimeError): res_network._add_single_result(target=1, settings=res_network.settings, results={}) # Test adding target results with unequal settings settings_test = cp.deepcopy(res_network.settings) settings_test.add_conditionals = 'Test' with pytest.raises(RuntimeError): res_network._add_single_result(target=0, settings=settings_test, results=res_network._single_target[1]) # Test adding a target with additional settings, results.settings should be # updated settings_test = cp.deepcopy(res_network.settings) settings_test.new_setting = 'Test' res_network._add_single_result(target=0, settings=settings_test, results=res_network._single_target[1]) assert 'new_setting' in res_network.settings.keys(), ( 'Settings dict was not updated.') assert res_network.settings.new_setting == 'Test', ( 'Settings dict was not updated correctly.')
def test_discrete_input(): """Test multivariate TE estimation from discrete data.""" # Generate Gaussian test data covariance = 0.4 data = _get_discrete_gauss_data(covariance=covariance, n=10000, delay=1, normalise=False, seed=SEED) corr_expected = covariance / ( 1 * np.sqrt(covariance**2 + (1-covariance)**2)) expected_mi = calculate_mi(corr_expected) settings = { 'cmi_estimator': 'JidtDiscreteCMI', 'discretise_method': 'none', 'n_discrete_bins': 5, # alphabet size of the variables analysed 'n_perm_max_stat': 21, 'n_perm_omnibus': 30, 'n_perm_max_seq': 30, 'min_lag_sources': 1, 'max_lag_sources': 2, 'max_lag_target': 1} nw = MultivariateTE() res = nw.analyse_single_target(settings=settings, data=data, target=1) assert np.isclose( res._single_target[1].omnibus_te, expected_mi, atol=0.05), ( 'Estimated TE for discrete variables is not correct. Expected: ' '{0}, Actual results: {1}.'.format( expected_mi, res._single_target[1].omnibus_te))
def test_analytical_surrogates(): # Generate discrete test data. covariance = 0.4 n = 10000 delay = 1 source = np.random.normal(0, 1, size=n) target = (covariance * source + (1 - covariance) * np.random.normal(0, 1, size=n)) source = source[delay:] target = target[:-delay] settings = {'discretise_method': 'equal', 'n_discrete_bins': 5} est = JidtDiscreteCMI(settings) source_dis, target_dis = est._discretise_vars(var1=source, var2=target) data = Data(np.vstack((source_dis, target_dis)), dim_order='ps', normalise=False) settings = { 'cmi_estimator': 'JidtDiscreteCMI', 'n_discrete_bins': 5, # alphabet size of the variables analysed 'n_perm_max_stat': 100, 'n_perm_min_stat': 21, 'n_perm_omnibus': 21, 'n_perm_max_seq': 21, 'max_lag_sources': 5, 'min_lag_sources': 1, 'max_lag_target': 5 } nw = MultivariateTE() res = nw.analyse_single_target(settings, data, target=1) assert res.settings.analytical_surrogates, ( 'Surrogates were not created analytically.')
def test_export_networkx(): """Test export to networkx DiGrap() object.""" # Test export of graph with unconnected nodes. max_lag = 3 data = Data(seed=SEED) data.generate_mute_data(500, 5) settings = { 'cmi_estimator': 'JidtKraskovCMI', 'noise_level': 0, 'n_perm_max_stat': 21, 'n_perm_min_stat': 21, 'n_perm_max_seq': 21, 'n_perm_omnibus': 21, 'max_lag_sources': max_lag, 'min_lag_sources': 1, 'max_lag_target': max_lag } target = 3 sources = [0, 4] te = MultivariateTE() results = te.analyse_single_target(settings, data, target=target, sources=sources) weights = 'binary' adj_matrix = results.get_adjacency_matrix(weights=weights, fdr=False) digraph = io.export_networkx_graph(adjacency_matrix=adj_matrix, weights=weights) np.testing.assert_array_equal(np.sort(digraph.nodes), np.arange(data.n_processes), err_msg='Wrong nodes in exported DiGraph.') # raise AssertionError('Test not yet implemented.') # Test export of networx graph for network inference results. weights = 'binary' adj_matrix = res_0.get_adjacency_matrix(weights=weights, fdr=False) io.export_networkx_graph(adjacency_matrix=adj_matrix, weights=weights) # Test export of source graph for s in [True, False]: io.export_networkx_source_graph(results=res_0, target=1, sign_sources=s, fdr=False) # Test export of networx graph for network comparison results. for weight in ['union', 'comparison', 'pvalue', 'diff_abs']: adj_matrix = res_within.get_adjacency_matrix(weights=weight) io.export_networkx_graph(adjacency_matrix=adj_matrix, weights=weight) for s in [True, False]: io.export_networkx_source_graph(results=res_0, target=1, sign_sources=s, fdr=False)
def test_pickle_results(): """Test pickling results objects.""" data = _generate_gauss_data() nw = MultivariateTE() res_single = nw.analyse_single_target( settings=settings, data=data, target=1) res_network = nw.analyse_network(settings=settings, data=data) outfile = TemporaryFile() pickle.dump(res_single, outfile) pickle.dump(res_network, outfile)
def test_plot_network(): """Test results class for multivariate TE network inference.""" covariance = 0.4 n = 10000 delay = 1 normalisation = False source = np.random.normal(0, 1, size=n) target_1 = (covariance * source + (1 - covariance) * np.random.normal(0, 1, size=n)) target_2 = (covariance * source + (1 - covariance) * np.random.normal(0, 1, size=n)) source = source[delay:] target_1 = target_1[:-delay] target_2 = target_2[:-delay] # Discretise data for speed settings_dis = {'discretise_method': 'equal', 'n_discrete_bins': 5} est = JidtDiscreteCMI(settings_dis) source_dis, target_1_dis = est._discretise_vars(var1=source, var2=target_1) source_dis, target_2_dis = est._discretise_vars(var1=source, var2=target_2) data = Data(np.vstack((source_dis, target_1_dis, target_2_dis)), dim_order='ps', normalise=normalisation) settings = { 'cmi_estimator': 'JidtDiscreteCMI', 'discretise_method': 'none', 'n_discrete_bins': 5, # alphabet size of the variables analysed 'n_perm_max_stat': 21, 'n_perm_omnibus': 30, 'n_perm_max_seq': 30, 'min_lag_sources': 1, 'max_lag_sources': 2, 'max_lag_target': 1, 'alpha_fdr': 0.5 } nw = MultivariateTE() # Analyse a single target and the whole network res_single = nw.analyse_single_target(settings=settings, data=data, target=1) res_network = nw.analyse_network(settings=settings, data=data) graph, fig = plot_network(res_single, 'max_te_lag', fdr=False) plt.close(fig) graph, fig = plot_network(res_network, 'max_te_lag', fdr=False) plt.close(fig) for sign_sources in [True, False]: graph, fig = plot_selected_vars(res_network, target=1, sign_sources=True, fdr=False) plt.close(fig)
def test_pickle_results(): """Test pickling results objects.""" data = _generate_gauss_data() nw = MultivariateTE() res_single = nw.analyse_single_target( settings=settings, data=data, target=1) res_network = nw.analyse_network(settings=settings, data=data) outfile = TemporaryFile() pickle.dump(res_single, outfile) pickle.dump(res_network, outfile)
def test_multivariate_te_lagged_copies(): """Test multivariate TE estimation on a lagged copy of random data. Run the multivariate TE algorithm on two sets of random data, where the second set is a lagged copy of the first. This test should find no significant conditionals at all (neither in the target's nor in the source's past). Note: This test takes several hours and may take one to two days on some machines. """ lag = 3 d_0 = np.random.rand(1, 1000, 20) d_1 = np.hstack((np.random.rand(1, lag, 20), d_0[:, lag:, :])) data = Data() data.set_data(np.vstack((d_0, d_1)), 'psr') settings = { 'cmi_estimator': 'JidtDiscreteCMI', 'discretise_method': 'max_ent', 'max_lag_sources': 5, 'n_perm_max_stat': 21, 'n_perm_min_stat': 21, 'n_perm_omnibus': 500, 'n_perm_max_seq': 500, } random_analysis = MultivariateTE() # Assert that there are no significant conditionals in either direction # other than the mandatory single sample in the target's past (which # ensures that we calculate a proper TE at any time in the algorithm). for t in range(2): results = random_analysis.analyse_single_target(settings, data, t) assert len(results.get_single_target( t, fdr=False).selected_vars_full) == 1, ( 'Conditional contains more/less than 1 variables.') assert not results.get_single_target( t, fdr=False).selected_vars_sources.size, ( 'Conditional sources is not empty.') assert len( results.get_single_target( t, fdr=False).selected_vars_target) == 1, ( 'Conditional target contains more/less than 1 variable.') assert results.get_single_target( t, fdr=False).selected_sources_pval is None, ( 'Conditional p-value is not None.') assert results.get_single_target(t, fdr=False).omnibus_pval is None, ( 'Omnibus p-value is not None.') assert results.get_single_target(t, fdr=False).omnibus_sign is None, ( 'Omnibus significance is not None.') assert results.get_single_target( t, fdr=False).selected_sources_te is None, ( 'Conditional TE values is not None.')
def test_multivariate_te_random(): """Test multivariate TE estimation on two random data sets. Run the multivariate TE algorithm on two sets of random data with no coupling. This test should find no significant conditionals at all (neither in the target's nor in the source's past). Note: This test takes several hours and may take one to two days on some machines. """ d = np.random.rand(2, 1000, 20) data = Data() data.set_data(d, 'psr') settings = { 'cmi_estimator': 'JidtKraskovCMI', 'max_lag_sources': 5, 'min_lag_sources': 1, 'n_perm_max_stat': 200, 'n_perm_min_stat': 200, 'n_perm_omnibus': 500, 'n_perm_max_seq': 500, } random_analysis = MultivariateTE() # Assert that there are no significant conditionals in either direction # other than the mandatory single sample in the target's past (which # ensures that we calculate a proper TE at any time in the algorithm). for t in range(2): results = random_analysis.analyse_single_target(settings, data, t) assert len(results.get_single_target( t, fdr=False).selected_vars_full) == 1, ( 'Conditional contains more/less than 1 variables.') assert not results.get_single_target( t, fdr=False).selected_vars_sources, ( 'Conditional sources is not empty.') assert len( results.get_single_target( t, fdr=False).selected_vars_target) == 1, ( 'Conditional target contains more/less than 1 variable.') assert results.get_single_target( t, fdr=False).selected_sources_pval is None, ( 'Conditional p-value is not None.') assert results.get_single_target(t, fdr=False).omnibus_pval is None, ( 'Omnibus p-value is not None.') assert results.get_single_target(t, fdr=False).omnibus_sign is None, ( 'Omnibus significance is not None.') assert results.get_single_target( t, fdr=False).selected_sources_te is None, ( 'Conditional TE values is not None.')
def test_plot_selected_vars(): dat = Data() dat.generate_mute_data(100, 5) settings = { 'cmi_estimator': 'JidtKraskovCMI', 'max_lag_sources': 5, 'min_lag_sources': 4, 'n_perm_max_stat': 25, 'n_perm_min_stat': 25, 'n_perm_omnibus': 50, 'n_perm_max_seq': 50, } network_analysis = MultivariateTE() res = network_analysis.analyse_single_target(settings, dat, target=2) vis.plot_selected_vars(res)
def test_multivariate_te_lorenz_2(): """Test multivariate TE estimation on bivariately couled Lorenz systems. Run the multivariate TE algorithm on two Lorenz systems with a coupling from first to second system with delay u = 45 samples. Both directions are analyzed, the algorithm should not find a coupling from system two to one. Note: This test takes several hours and may take one to two days on some machines. """ # load simulated data from 2 coupled Lorenz systems 1->2, u = 45 ms d = np.load( os.path.join(os.path.dirname(__file__), 'data/lorenz_2_exampledata.npy')) data = Data() data.set_data(d, 'psr') settings = { 'cmi_estimator': 'JidtKraskovCMI', 'max_lag_sources': 47, 'min_lag_sources': 42, 'max_lag_target': 20, 'tau_target': 2, 'n_perm_max_stat': 21, # 200 'n_perm_min_stat': 21, # 200 'n_perm_omnibus': 21, 'n_perm_max_seq': 21, # this should be equal to the min stats b/c we # reuse the surrogate table from the min stats } lorenz_analysis = MultivariateTE() # FOR DEBUGGING: add the whole history for k = 20, tau = 2 to the # estimation, this makes things faster, b/c these don't have to be # tested again. Note conditionals are specified using lags. settings['add_conditionals'] = [(1, 19), (1, 17), (1, 15), (1, 13), (1, 11), (1, 9), (1, 7), (1, 5), (1, 3), (1, 1)] settings['max_lag_sources'] = 60 settings['min_lag_sources'] = 31 settings['tau_sources'] = 2 settings['max_lag_target'] = 1 settings['tau_target'] = 1 # Just analyse the direction of coupling results = lorenz_analysis.analyse_single_target(settings, data, target=1) print(results._single_target) adj_matrix = results.get_adjacency_matrix(weights='binary', fdr=False) adj_matrix.print_matrix()
def test_multivariate_te_lorenz_2(): """Test multivariate TE estimation on bivariately couled Lorenz systems. Run the multivariate TE algorithm on two Lorenz systems with a coupling from first to second system with delay u = 45 samples. Both directions are analyzed, the algorithm should not find a coupling from system two to one. Note: This test takes several hours and may take one to two days on some machines. """ # load simulated data from 2 coupled Lorenz systems 1->2, u = 45 ms d = np.load( os.path.join(os.path.dirname(__file__), 'data/lorenz_2_exampledata.npy')) dat = Data() dat.set_data(d, 'psr') settings = { 'cmi_estimator': 'JidtKraskovCMI', 'max_lag_sources': 47, 'min_lag_sources': 42, 'max_lag_target': 20, 'tau_target': 2, 'n_perm_max_stat': 21, # 200 'n_perm_min_stat': 21, # 200 'n_perm_omnibus': 21, 'n_perm_max_seq': 21, # this should be equal to the min stats b/c we # reuse the surrogate table from the min stats } lorenz_analysis = MultivariateTE() # FOR DEBUGGING: add the whole history for k = 20, tau = 2 to the # estimation, this makes things faster, b/c these don't have to be # tested again. settings['add_conditionals'] = [(1, 44), (1, 42), (1, 40), (1, 38), (1, 36), (1, 34), (1, 32), (1, 30), (1, 28)] # res = lorenz_analysis.analyse_network(settings, dat) # res_0 = lorenz_analysis.analyse_single_target(settings, dat, 0) # no coupling # print(res_0) settings['max_lag_sources'] = 60 settings['min_lag_sources'] = 31 settings['tau_sources'] = 2 settings['max_lag_target'] = 0 settings['tau_target'] = 1 res_1 = lorenz_analysis.analyse_single_target(settings, dat, 1) # coupling print(res_1)
def test_gauss_data(): """Test multivariate TE estimation from correlated Gaussians.""" # Generate data and add a delay one one sample. expected_mi, source, source_uncorr, target = _get_gauss_data() source = source[1:] source_uncorr = source_uncorr[1:] target = target[:-1] data = Data(np.hstack((source, source_uncorr, target)), dim_order='sp', normalise=False) settings = { 'cmi_estimator': 'JidtKraskovCMI', 'n_perm_max_stat': 21, 'n_perm_min_stat': 21, 'n_perm_max_seq': 21, 'n_perm_omnibus': 21, 'max_lag_sources': 2, 'min_lag_sources': 1 } nw = MultivariateTE() results = nw.analyse_single_target(settings, data, target=2, sources=[0, 1]) te = results.get_single_target(2, fdr=False)['te'][0] sources = results.get_target_sources(2, fdr=False) # Assert that only the correlated source was detected. assert len(sources) == 1, 'Wrong no. inferred sources: {0}.'.format( len(sources)) assert sources[0] == 0, 'Wrong inferred source: {0}.'.format(sources[0]) # Compare BivarateMI() estimate to JIDT estimate. Mimick realisations used # internally by the algorithm. est = JidtKraskovTE({ 'history_target': 1, 'history_source': 1, 'source_target_delay': 1, 'normalise': False }) jidt_cmi = est.estimate(source=source, target=target) print('Estimated MI: {0:0.6f}, estimated MI using JIDT core estimator: ' '{1:0.6f} (expected: {2:0.6f}).'.format(te, jidt_cmi, expected_mi)) assert np.isclose(te, jidt_cmi, atol=0.005), ( 'Estimated MI {0:0.6f} differs from JIDT estimate {1:0.6f} (expected: ' 'MI {2:0.6f}).'.format(te, jidt_cmi, expected_mi)) assert np.isclose(te, expected_mi, atol=0.05), ( 'Estimated TE {0:0.6f} differs from expected TE {1:0.6f}.'.format( te, expected_mi))
def test_multivariate_te_lagged_copies(): """Test multivariate TE estimation on a lagged copy of random data. Run the multivariate TE algorithm on two sets of random data, where the second set is a lagged copy of the first. This test should find no significant conditionals at all (neither in the target's nor in the source's past). Note: This test takes several hours and may take one to two days on some machines. """ lag = 3 d_0 = np.random.rand(1, 1000, 20) d_1 = np.hstack((np.random.rand(1, lag, 20), d_0[:, lag:, :])) dat = Data() dat.set_data(np.vstack((d_0, d_1)), 'psr') settings = { 'cmi_estimator': 'JidtKraskovCMI', 'max_lag_sources': 5, 'n_perm_max_stat': 21, 'n_perm_min_stat': 21, 'n_perm_omnibus': 500, 'n_perm_max_seq': 500, } random_analysis = MultivariateTE() # Assert that there are no significant conditionals in either direction # other than the mandatory single sample in the target's past (which # ensures that we calculate a proper TE at any time in the algorithm). for target in range(2): res = random_analysis.analyse_single_target(settings, dat, target) assert (len(res['conditional_full']) == 1), ('Conditional contains ' 'more/less than 1 ' 'variables.') assert (not res['conditional_sources']), ('Conditional sources is not ' 'empty.') assert (len(res['conditional_target']) == 1), ('Conditional target ' 'contains more/less ' 'than 1 variable.') assert (res['cond_sources_pval'] is None), ('Conditional p-value is ' 'not None.') assert (res['omnibus_pval'] is None), ('Omnibus p-value is not None.') assert (res['omnibus_sign'] is None), ('Omnibus significance is not ' 'None.') assert (res['conditional_sources_te'] is None), ('Conditional TE ' 'values is not None.')
def test_multivariate_te_lagged_copies(): """Test multivariate TE estimation on a lagged copy of random data. Run the multivariate TE algorithm on two sets of random data, where the second set is a lagged copy of the first. This test should find no significant conditionals at all (neither in the target's nor in the source's past). Note: This test takes several hours and may take one to two days on some machines. """ lag = 3 d_0 = np.random.rand(1, 1000, 20) d_1 = np.hstack((np.random.rand(1, lag, 20), d_0[:, lag:, :])) data = Data() data.set_data(np.vstack((d_0, d_1)), 'psr') settings = { 'cmi_estimator': 'JidtKraskovCMI', 'max_lag_sources': 5, 'n_perm_max_stat': 21, 'n_perm_min_stat': 21, 'n_perm_omnibus': 500, 'n_perm_max_seq': 500, } random_analysis = MultivariateTE() # Assert that there are no significant conditionals in either direction # other than the mandatory single sample in the target's past (which # ensures that we calculate a proper TE at any time in the algorithm). for t in range(2): results = random_analysis.analyse_single_target(settings, data, t) assert len(results.get_single_target(t, fdr=False).selected_vars_full) == 1, ( 'Conditional contains more/less than 1 variables.') assert not results.get_single_target(t, fdr=False).selected_vars_sources.size, ( 'Conditional sources is not empty.') assert len(results.get_single_target(t, fdr=False).selected_vars_target) == 1, ( 'Conditional target contains more/less than 1 variable.') assert results.get_single_target(t, fdr=False).selected_sources_pval is None, ( 'Conditional p-value is not None.') assert results.get_single_target(t, fdr=False).omnibus_pval is None, ( 'Omnibus p-value is not None.') assert results.get_single_target(t, fdr=False).omnibus_sign is None, ( 'Omnibus significance is not None.') assert results.get_single_target(t, fdr=False).selected_sources_te is None, ( 'Conditional TE values is not None.')
def test_multivariate_te_lorenz_2(): """Test multivariate TE estimation on bivariately couled Lorenz systems. Run the multivariate TE algorithm on two Lorenz systems with a coupling from first to second system with delay u = 45 samples. Both directions are analyzed, the algorithm should not find a coupling from system two to one. Note: This test takes several hours and may take one to two days on some machines. """ # load simulated data from 2 coupled Lorenz systems 1->2, u = 45 ms d = np.load(os.path.join(os.path.dirname(__file__), 'data/lorenz_2_exampledata.npy')) data = Data() data.set_data(d, 'psr') settings = { 'cmi_estimator': 'JidtKraskovCMI', 'max_lag_sources': 47, 'min_lag_sources': 42, 'max_lag_target': 20, 'tau_target': 2, 'n_perm_max_stat': 21, # 200 'n_perm_min_stat': 21, # 200 'n_perm_omnibus': 21, 'n_perm_max_seq': 21, # this should be equal to the min stats b/c we # reuse the surrogate table from the min stats } lorenz_analysis = MultivariateTE() # FOR DEBUGGING: add the whole history for k = 20, tau = 2 to the # estimation, this makes things faster, b/c these don't have to be # tested again. Note conditionals are specified using lags. settings['add_conditionals'] = [(1, 19), (1, 17), (1, 15), (1, 13), (1, 11), (1, 9), (1, 7), (1, 5), (1, 3), (1, 1)] settings['max_lag_sources'] = 60 settings['min_lag_sources'] = 31 settings['tau_sources'] = 2 settings['max_lag_target'] = 1 settings['tau_target'] = 1 # Just analyse the direction of coupling results = lorenz_analysis.analyse_single_target(settings, data, target=1) print(results._single_target) print(results.get_adjacency_matrix('binary'))
def test_multivariate_te_random(): """Test multivariate TE estimation on two random data sets. Run the multivariate TE algorithm on two sets of random data with no coupling. This test should find no significant conditionals at all (neither in the target's nor in the source's past). Note: This test takes several hours and may take one to two days on some machines. """ d = np.random.rand(2, 1000, 20) dat = Data() dat.set_data(d, 'psr') settings = { 'cmi_estimator': 'JidtDiscreteCMI', 'discretise_method': 'max_ent', 'max_lag_sources': 5, 'n_perm_max_stat': 200, 'n_perm_min_stat': 200, 'n_perm_omnibus': 500, 'n_perm_max_seq': 500, } random_analysis = MultivariateTE() # Assert that there are no significant conditionals in either direction # other than the mandatory single sample in the target's past (which # ensures that we calculate a proper TE at any time in the algorithm). for target in range(2): res = random_analysis.analyse_single_target(settings, dat, target) assert (len(res['conditional_full']) == 1), ('Conditional contains ' 'more/less than 1 ' 'variables.') assert (not res['conditional_sources']), ('Conditional sources is not ' 'empty.') assert (len(res['conditional_target']) == 1), ('Conditional target ' 'contains more/less ' 'than 1 variable.') assert (res['cond_sources_pval'] is None), ('Conditional p-value is ' 'not None.') assert (res['omnibus_pval'] is None), ('Omnibus p-value is not None.') assert (res['omnibus_sign'] is None), ('Omnibus significance is not ' 'None.') assert (res['conditional_sources_te'] is None), ('Conditional TE ' 'values is not None.')
def test_discrete_input(): """Test multivariate TE estimation from discrete data.""" # Generate Gaussian test data covariance = 0.4 n = 10000 delay = 1 source = np.random.normal(0, 1, size=n) target = (covariance * source + (1 - covariance) * np.random.normal(0, 1, size=n)) corr_expected = covariance / (1 * np.sqrt(covariance**2 + (1 - covariance)**2)) expected_mi = calculate_mi(corr_expected) source = source[delay:] target = target[:-delay] # Discretise data settings = {'discretise_method': 'equal', 'n_discrete_bins': 5} est = JidtDiscreteCMI(settings) source_dis, target_dis = est._discretise_vars(var1=source, var2=target) data = Data(np.vstack((source_dis, target_dis)), dim_order='ps', normalise=False) settings = { 'cmi_estimator': 'JidtDiscreteCMI', 'discretise_method': 'none', 'n_discrete_bins': 5, # alphabet size of the variables analysed 'n_perm_max_stat': 21, 'n_perm_omnibus': 30, 'n_perm_max_seq': 30, 'min_lag_sources': 1, 'max_lag_sources': 2, 'max_lag_target': 1 } nw = MultivariateTE() res = nw.analyse_single_target(settings=settings, data=data, target=1) assert np.isclose( res._single_target[1].omnibus_te, expected_mi, atol=0.05), ( 'Estimated TE for discrete variables is not correct. Expected: ' '{0}, Actual results: {1}.'.format( expected_mi, res._single_target[1].omnibus_te))
def test_compare_bivariate_and_multivariate_te(): """Compare bivariate to multivariate TE estimation.""" expected_mi, source, source_uncorr, target = _get_gauss_data(seed=SEED) source = source[1:] source_uncorr = source_uncorr[1:] target = target[:-1] data = Data(np.hstack((source, source_uncorr, target)), dim_order='sp', normalise=False, seed=SEED) settings = { 'cmi_estimator': 'JidtKraskovCMI', 'n_perm_max_stat': 21, 'n_perm_min_stat': 21, 'n_perm_max_seq': 21, 'n_perm_omnibus': 21, 'max_lag_sources': 1, 'min_lag_sources': 1, 'max_lag_target': 1 } nw_bivar = BivariateTE() results = nw_bivar.analyse_single_target(settings, data, target=2, sources=[0, 1]) te_bivar = results.get_single_target(2, fdr=False)['te'][0] nw_multivar = MultivariateTE() results = nw_multivar.analyse_single_target(settings, data, target=2, sources=[0, 1]) te_multivar = results.get_single_target(2, fdr=False)['te'][0] print('Estimated TE: {0:0.6f}, estimated TE using multivariate algorithm: ' '{1:0.6f} (expected: ~ {2:0.6f}).'.format(te_bivar, te_multivar, expected_mi)) assert np.isclose(te_bivar, te_multivar, atol=0.005), ( 'Estimated TE {0:0.6f} differs from multivariate estimate {1:0.6f} ' '(expected: TE {2:0.6f}).'.format(te_bivar, te_multivar, expected_mi))
def test_discrete_input(): """Test multivariate TE estimation from discrete data.""" # Generate Gaussian test data covariance = 0.4 n = 10000 delay = 1 source = np.random.normal(0, 1, size=n) target = (covariance * source + (1 - covariance) * np.random.normal(0, 1, size=n)) corr_expected = covariance / (1 * np.sqrt(covariance**2 + (1-covariance)**2)) expected_mi = calculate_mi(corr_expected) source = source[delay:] target = target[:-delay] # Discretise data settings = {'discretise_method': 'equal', 'n_discrete_bins': 5} est = JidtDiscreteCMI(settings) source_dis, target_dis = est._discretise_vars(var1=source, var2=target) data = Data(np.vstack((source_dis, target_dis)), dim_order='ps', normalise=False) settings = { 'cmi_estimator': 'JidtDiscreteCMI', 'discretise_method': 'none', 'n_discrete_bins': 5, # alphabet size of the variables analysed 'n_perm_max_stat': 21, 'n_perm_omnibus': 30, 'n_perm_max_seq': 30, 'min_lag_sources': 1, 'max_lag_sources': 2, 'max_lag_target': 1} nw = MultivariateTE() res = nw.analyse_single_target(settings=settings, data=data, target=1) assert np.isclose( res._single_target[1].omnibus_te, expected_mi, atol=0.05), ( 'Estimated TE for discrete variables is not correct. Expected: ' '{0}, Actual results: {1}.'.format( expected_mi, res._single_target[1].omnibus_te))
def test_multivariate_te_corr_gaussian(estimator=None): """Test multivariate TE estimation on correlated Gaussians. Run the multivariate TE algorithm on two sets of random Gaussian data with a given covariance. The second data set is shifted by one sample creating a source-target delay of one sample. This example is modeled after the JIDT demo 4 for transfer entropy. The resulting TE can be compared to the analytical result (but expect some error in the estimate). The simulated delay is 1 sample, i.e., the algorithm should find significant TE from sample (0, 1), a sample in process 0 with lag/delay 1. The final target sample should always be (1, 1), the mandatory sample at lat 1, because there is no memory in the process. Note: This test runs considerably faster than other system tests. This produces strange small values for non-coupled sources. TODO """ if estimator is None: estimator = 'JidtKraskovCMI' n = 1000 cov = 0.4 source = [rn.normalvariate(0, 1) for r in range(n)] target = [sum(pair) for pair in zip( [cov * y for y in source], [(1 - cov) * y for y in [rn.normalvariate(0, 1) for r in range(n)]])] # Cast everything to numpy so the idtxl estimator understands it. source = np.expand_dims(np.array(source), axis=1) target = np.expand_dims(np.array(target), axis=1) data = Data(normalise=True) data.set_data(np.vstack((source[1:].T, target[:-1].T)), 'ps') settings = { 'cmi_estimator': estimator, 'max_lag_sources': 5, 'min_lag_sources': 1, 'max_lag_target': 5, 'n_perm_max_stat': 21, 'n_perm_min_stat': 21, 'n_perm_omnibus': 21, 'n_perm_max_seq': 21, } random_analysis = MultivariateTE() results = random_analysis.analyse_single_target(settings, data, 1) # Assert that there are significant conditionals from the source for target # 1. For 500 repetitions I got mean errors of 0.02097686 and 0.01454073 for # examples 1 and 2 respectively. The maximum errors were 0.093841 and # 0.05833172 repectively. This inspired the following error boundaries. corr_expected = cov / (1 * np.sqrt(cov**2 + (1-cov)**2)) expected_res = calculate_mi(corr_expected) estimated_res = results.get_single_target(1, fdr=False).omnibus_te diff = np.abs(estimated_res - expected_res) print('Expected source sample: (0, 1)\nExpected target sample: (1, 1)') print(('Estimated TE: {0:5.4f}, analytical result: {1:5.4f}, error:' '{2:2.2f} % ').format( estimated_res, expected_res, diff / expected_res)) assert (diff < 0.1), ('Multivariate TE calculation for correlated ' 'Gaussians failed (error larger 0.1: {0}, expected: ' '{1}, actual: {2}).'.format( diff, expected_res, estimated_res))
from idtxl.multivariate_te import MultivariateTE from idtxl.data import Data start_time = time.time() # load simulated data from 2 coupled Lorenz systems 1->2, u = 45 ms d = np.load( os.path.join(os.path.dirname(__file__), 'data/lorenz_2_exampledata.npy')) dat = Data() dat.set_data(d[:, :, 0:100], 'psr') settings = { 'cmi_estimator': 'JidtKraskovCMI', 'max_lag_sources': 50, 'min_lag_sources': 40, 'max_lag_target': 30, 'tau_sources': 1, 'tau_target': 3, 'n_perm_max_stat': 200, 'n_perm_min_stat': 200, 'n_perm_omnibus': 500, 'n_perm_max_seq': 500, } lorenz_analysis = MultivariateTE() res_1 = lorenz_analysis.analyse_single_target(settings, dat, 0) res_2 = lorenz_analysis.analyse_single_target(settings, dat, 1) runtime = time.time() - start_time print("---- {0} minutes".format(runtime / 60)) path = os.path.dirname(__file__) + 'output/' np.savez(path + 'test_lorenz', res_1, res_2) np.save(path + 'test_lorenz_time', runtime)
def test_multivariate_te_init(): """Test instance creation for MultivariateTE class.""" # Test error on missing estimator settings = { 'n_perm_max_stat': 21, 'n_perm_omnibus': 30, 'max_lag_sources': 7, 'min_lag_sources': 2, 'max_lag_target': 5} nw = MultivariateTE() with pytest.raises(RuntimeError): nw.analyse_single_target(settings=settings, data=Data(), target=1) # Test setting of min and max lags settings['cmi_estimator'] = 'JidtKraskovCMI' data = Data() data.generate_mute_data(n_samples=10, n_replications=5) # Valid: max lag sources bigger than max lag target nw.analyse_single_target(settings=settings, data=data, target=1) # Valid: max lag sources smaller than max lag target settings['max_lag_sources'] = 3 nw.analyse_single_target(settings=settings, data=data, target=1) # Invalid: min lag sources bigger than max lag settings['min_lag_sources'] = 8 settings['max_lag_sources'] = 7 settings['max_lag_target'] = 5 with pytest.raises(RuntimeError): nw.analyse_single_target(settings=settings, data=data, target=1) # Invalid: taus bigger than lags settings['min_lag_sources'] = 2 settings['max_lag_sources'] = 4 settings['max_lag_target'] = 5 settings['tau_sources'] = 10 with pytest.raises(RuntimeError): nw.analyse_single_target(settings=settings, data=data, target=1) settings['tau_sources'] = 1 settings['tau_target'] = 10 with pytest.raises(RuntimeError): nw.analyse_single_target(settings=settings, data=data, target=1) # Invalid: negative lags or taus settings['min_lag_sources'] = 1 settings['max_lag_target'] = 5 settings['max_lag_sources'] = -7 settings['tau_target'] = 1 with pytest.raises(RuntimeError): nw.analyse_single_target(settings=settings, data=data, target=1) settings['max_lag_sources'] = 7 settings['min_lag_sources'] = -4 with pytest.raises(RuntimeError): nw.analyse_single_target(settings=settings, data=data, target=1) settings['min_lag_sources'] = 4 settings['max_lag_target'] = -1 with pytest.raises(RuntimeError): nw.analyse_single_target(settings=settings, data=data, target=1) settings['max_lag_target'] = 5 settings['tau_sources'] = -1 with pytest.raises(RuntimeError): nw.analyse_single_target(settings=settings, data=data, target=1) settings['tau_sources'] = 1 settings['tau_target'] = -1 with pytest.raises(RuntimeError): nw.analyse_single_target(settings=settings, data=data, target=1) # Invalid: lags or taus are no integers settings['tau_target'] = 1 settings['min_lag_sources'] = 1.5 with pytest.raises(RuntimeError): nw.analyse_single_target(settings=settings, data=data, target=1) settings['min_lag_sources'] = 1 settings['max_lag_sources'] = 1.5 with pytest.raises(RuntimeError): nw.analyse_single_target(settings=settings, data=data, target=1) settings['max_lag_sources'] = 7 settings['tau_sources'] = 1.5 with pytest.raises(RuntimeError): nw.analyse_single_target(settings=settings, data=data, target=1) settings['tau_sources'] = 1 settings['tau_target'] = 1.5 with pytest.raises(RuntimeError): nw.analyse_single_target(settings=settings, data=data, target=1) settings['tau_target'] = 1 # Invalid: sources or target is no int with pytest.raises(RuntimeError): # no int nw.analyse_single_target(settings=settings, data=data, target=1.5) with pytest.raises(RuntimeError): # negative nw.analyse_single_target(settings=settings, data=data, target=-1) with pytest.raises(RuntimeError): # not in data nw.analyse_single_target(settings=settings, data=data, target=10) with pytest.raises(RuntimeError): # wrong type nw.analyse_single_target(settings=settings, data=data, target={}) with pytest.raises(RuntimeError): # negative nw.analyse_single_target(settings=settings, data=data, target=0, sources=-1) with pytest.raises(RuntimeError): # negative nw.analyse_single_target(settings=settings, data=data, target=0, sources=[-1]) with pytest.raises(RuntimeError): # not in data nw.analyse_single_target(settings=settings, data=data, target=0, sources=20) with pytest.raises(RuntimeError): # not in data nw.analyse_single_target(settings=settings, data=data, target=0, sources=[20])
def test_results_network_inference(): """Test results class for multivariate TE network inference.""" covariance = 0.4 n = 10000 delay = 1 normalisation = False source = np.random.normal(0, 1, size=n) target_1 = (covariance * source + (1 - covariance) * np.random.normal(0, 1, size=n)) target_2 = (covariance * source + (1 - covariance) * np.random.normal(0, 1, size=n)) corr_expected = covariance / ( 1 * np.sqrt(covariance**2 + (1-covariance)**2)) expected_mi = calculate_mi(corr_expected) source = source[delay:] target_1 = target_1[:-delay] target_2 = target_2[:-delay] # Discretise data for speed settings_dis = {'discretise_method': 'equal', 'n_discrete_bins': 5} est = JidtDiscreteCMI(settings_dis) source_dis, target_1_dis = est._discretise_vars(var1=source, var2=target_1) source_dis, target_2_dis = est._discretise_vars(var1=source, var2=target_2) data = Data(np.vstack((source_dis, target_1_dis, target_2_dis)), dim_order='ps', normalise=normalisation) nw = MultivariateTE() # TE - single target res_single_multi_te = nw.analyse_single_target( settings=settings, data=data, target=1) # TE whole network res_network_multi_te = nw.analyse_network(settings=settings, data=data) nw = BivariateTE() # TE - single target res_single_biv_te = nw.analyse_single_target( settings=settings, data=data, target=1) # TE whole network res_network_biv_te = nw.analyse_network(settings=settings, data=data) nw = MultivariateMI() # TE - single target res_single_multi_mi = nw.analyse_single_target( settings=settings, data=data, target=1) # TE whole network res_network_multi_mi = nw.analyse_network(settings=settings, data=data) nw = BivariateMI() # TE - single target res_single_biv_mi = nw.analyse_single_target( settings=settings, data=data, target=1) # TE whole network res_network_biv_mi = nw.analyse_network(settings=settings, data=data) res_te = [res_single_multi_te, res_network_multi_te, res_single_biv_te, res_network_biv_te] res_mi = [res_single_multi_mi, res_network_multi_mi, res_single_biv_mi, res_network_biv_mi] res_all = res_te + res_mi # Check estimated values for res in res_te: est_te = res._single_target[1].omnibus_te assert np.isclose(est_te, expected_mi, atol=0.05), ( 'Estimated TE for discrete variables is not correct. Expected: ' '{0}, Actual results: {1}.'.format(expected_mi, est_te)) for res in res_mi: est_mi = res._single_target[1].omnibus_mi assert np.isclose(est_mi, expected_mi, atol=0.05), ( 'Estimated TE for discrete variables is not correct. Expected: ' '{0}, Actual results: {1}.'.format(expected_mi, est_mi)) est_te = res_network_multi_te._single_target[2].omnibus_te assert np.isclose(est_te, expected_mi, atol=0.05), ( 'Estimated TE for discrete variables is not correct. Expected: {0}, ' 'Actual results: {1}.'.format(expected_mi, est_te)) est_mi = res_network_multi_mi._single_target[2].omnibus_mi assert np.isclose(est_mi, expected_mi, atol=0.05), ( 'Estimated TE for discrete variables is not correct. Expected: {0}, ' 'Actual results: {1}.'.format(expected_mi, est_mi)) # Check data parameters in results objects n_nodes = 3 n_realisations = n - delay - max( settings['max_lag_sources'], settings['max_lag_target']) for res in res_all: assert res.data_properties.n_nodes == n_nodes, 'Incorrect no. nodes.' assert res.data_properties.n_nodes == n_nodes, 'Incorrect no. nodes.' assert res.data_properties.n_realisations == n_realisations, ( 'Incorrect no. realisations.') assert res.data_properties.n_realisations == n_realisations, ( 'Incorrect no. realisations.') assert res.data_properties.normalised == normalisation, ( 'Incorrect value for data normalisation.') assert res.data_properties.normalised == normalisation, ( 'Incorrect value for data normalisation.') adj_matrix = res.get_adjacency_matrix('binary', fdr=False) assert adj_matrix.shape[0] == n_nodes, ( 'Incorrect number of rows in adjacency matrix.') assert adj_matrix.shape[1] == n_nodes, ( 'Incorrect number of columns in adjacency matrix.') assert adj_matrix.shape[0] == n_nodes, ( 'Incorrect number of rows in adjacency matrix.') assert adj_matrix.shape[1] == n_nodes, ( 'Incorrect number of columns in adjacency matrix.')
def test_delay_reconstruction(): """Test the reconstruction of information transfer delays from results.""" covariance = 0.4 corr_expected = covariance / (1 * np.sqrt(covariance**2 + (1 - covariance)**2)) expected_mi = calculate_mi(corr_expected) n = 10000 delay_1 = 1 delay_2 = 3 delay_3 = 5 normalisation = False source = np.random.normal(0, 1, size=n) target_1 = (covariance * source + (1 - covariance) * np.random.normal(0, 1, size=n)) target_2 = (covariance * source + (1 - covariance) * np.random.normal(0, 1, size=n)) target_3 = (covariance * source + (1 - covariance) * np.random.normal(0, 1, size=n)) source = source[delay_3:] target_1 = target_1[(delay_3 - delay_1):-delay_1] target_2 = target_2[(delay_3 - delay_2):-delay_2] target_3 = target_3[:-delay_3] # Discretise data for speed settings_dis = {'discretise_method': 'equal', 'n_discrete_bins': 5} est = JidtDiscreteCMI(settings_dis) source_dis, target_1_dis = est._discretise_vars(var1=source, var2=target_1) source_dis, target_2_dis = est._discretise_vars(var1=source, var2=target_2) source_dis, target_3_dis = est._discretise_vars(var1=source, var2=target_3) data = Data(np.vstack( (source_dis, target_1_dis, target_2_dis, target_3_dis)), dim_order='ps', normalise=normalisation) nw = MultivariateTE() settings = { 'cmi_estimator': 'JidtDiscreteCMI', 'discretise_method': 'none', 'n_discrete_bins': 5, # alphabet size of the variables analysed 'n_perm_max_stat': 21, 'n_perm_omnibus': 30, 'n_perm_max_seq': 30, 'min_lag_sources': 1, 'max_lag_sources': delay_3 + 1, 'max_lag_target': 1 } res_network = nw.analyse_single_target(settings=settings, data=data, target=1) res_network.combine_results( nw.analyse_single_target(settings=settings, data=data, target=2)) res_network.combine_results( nw.analyse_single_target(settings=settings, data=data, target=3)) adj_mat = res_network.get_adjacency_matrix('max_te_lag', fdr=False) adj_mat.print_matrix() assert adj_mat._weight_matrix[0, 1] == delay_1, ( 'Estimate for delay 1 is not correct.') assert adj_mat._weight_matrix[0, 2] == delay_2, ( 'Estimate for delay 2 is not correct.') assert adj_mat._weight_matrix[0, 3] == delay_3, ( 'Estimate for delay 3 is not correct.') for target in range(1, 4): est_mi = res_network._single_target[target].omnibus_te assert np.isclose(est_mi, expected_mi, atol=0.05), ( 'Estimated TE for target {0} is not correct. Expected: {1}, ' 'Actual results: {2}.'.format(target, expected_mi, est_mi))
def test_multivariate_te_init(): """Test instance creation for MultivariateTE class.""" # Test error on missing estimator settings = { 'n_perm_max_stat': 21, 'n_perm_omnibus': 30, 'max_lag_sources': 7, 'min_lag_sources': 2, 'max_lag_target': 5 } nw = MultivariateTE() with pytest.raises(RuntimeError): nw.analyse_single_target(settings=settings, data=Data(), target=1) # Test setting of min and max lags settings['cmi_estimator'] = 'JidtKraskovCMI' data = Data() data.generate_mute_data(n_samples=10, n_replications=5) # Valid: max lag sources bigger than max lag target nw.analyse_single_target(settings=settings, data=data, target=1) # Valid: max lag sources smaller than max lag target settings['max_lag_sources'] = 3 nw.analyse_single_target(settings=settings, data=data, target=1) # Invalid: min lag sources bigger than max lag settings['min_lag_sources'] = 8 settings['max_lag_sources'] = 7 settings['max_lag_target'] = 5 with pytest.raises(RuntimeError): nw.analyse_single_target(settings=settings, data=data, target=1) # Invalid: taus bigger than lags settings['min_lag_sources'] = 2 settings['max_lag_sources'] = 4 settings['max_lag_target'] = 5 settings['tau_sources'] = 10 with pytest.raises(RuntimeError): nw.analyse_single_target(settings=settings, data=data, target=1) settings['tau_sources'] = 1 settings['tau_target'] = 10 with pytest.raises(RuntimeError): nw.analyse_single_target(settings=settings, data=data, target=1) # Invalid: negative lags or taus settings['min_lag_sources'] = 1 settings['max_lag_target'] = 5 settings['max_lag_sources'] = -7 settings['tau_target'] = 1 with pytest.raises(RuntimeError): nw.analyse_single_target(settings=settings, data=data, target=1) settings['max_lag_sources'] = 7 settings['min_lag_sources'] = -4 with pytest.raises(RuntimeError): nw.analyse_single_target(settings=settings, data=data, target=1) settings['min_lag_sources'] = 4 settings['max_lag_target'] = -1 with pytest.raises(RuntimeError): nw.analyse_single_target(settings=settings, data=data, target=1) settings['max_lag_target'] = 5 settings['tau_sources'] = -1 with pytest.raises(RuntimeError): nw.analyse_single_target(settings=settings, data=data, target=1) settings['tau_sources'] = 1 settings['tau_target'] = -1 with pytest.raises(RuntimeError): nw.analyse_single_target(settings=settings, data=data, target=1) # Invalid: lags or taus are no integers settings['tau_target'] = 1 settings['min_lag_sources'] = 1.5 with pytest.raises(RuntimeError): nw.analyse_single_target(settings=settings, data=data, target=1) settings['min_lag_sources'] = 1 settings['max_lag_sources'] = 1.5 with pytest.raises(RuntimeError): nw.analyse_single_target(settings=settings, data=data, target=1) settings['max_lag_sources'] = 7 settings['tau_sources'] = 1.5 with pytest.raises(RuntimeError): nw.analyse_single_target(settings=settings, data=data, target=1) settings['tau_sources'] = 1 settings['tau_target'] = 1.5 with pytest.raises(RuntimeError): nw.analyse_single_target(settings=settings, data=data, target=1) settings['tau_target'] = 1 # Invalid: sources or target is no int with pytest.raises(RuntimeError): # no int nw.analyse_single_target(settings=settings, data=data, target=1.5) with pytest.raises(RuntimeError): # negative nw.analyse_single_target(settings=settings, data=data, target=-1) with pytest.raises(RuntimeError): # not in data nw.analyse_single_target(settings=settings, data=data, target=10) with pytest.raises(RuntimeError): # wrong type nw.analyse_single_target(settings=settings, data=data, target={}) with pytest.raises(RuntimeError): # negative nw.analyse_single_target(settings=settings, data=data, target=0, sources=-1) with pytest.raises(RuntimeError): # negative nw.analyse_single_target(settings=settings, data=data, target=0, sources=[-1]) with pytest.raises(RuntimeError): # not in data nw.analyse_single_target(settings=settings, data=data, target=0, sources=20) with pytest.raises(RuntimeError): # not in data nw.analyse_single_target(settings=settings, data=data, target=0, sources=[20])
region_ix = [range(1350, 1600), range(2350, 2600)] else: sys.exit() data = stack_data(region_ix) # b) Initialise analysis object and define settings network_analysis = MultivariateTE() settings = { 'cmi_estimator': 'JidtKraskovCMI', 'history_target': 1, 'n_perm_max_stat': 100, 'alpha_max_stat': 0.05, 'permute_in_time': True, 'max_lag_sources': 5, 'min_lag_sources': 1, 'add_conditionals': [(0, 1), (0, 2), (1, 1), (1, 2)] } # c) Run analysis sources = list(range(2, 20)) sources.remove(target_id) res = network_analysis.analyse_single_target(settings=settings, data=data, target=target_id, sources=sources) # Save results dictionary using pickle path = 'target_results/{}_res.{}.pkl'.format(region, str(target_id)) pickle.dump(res, open(path, 'wb'))
"""Plot graph output from multivariate TE estimation. author: patricia """ from idtxl.data import Data from idtxl.multivariate_te import MultivariateTE from idtxl import visualise_graph # Generate some example output data = Data() data.generate_mute_data(n_replications=2, n_samples=500) print('Demo data with {0} procs, {1} samples, {2} reps.'.format( data.n_processes, data.n_samples, data.n_replications)) settings = { 'cmi_estimator': 'JidtKraskovCMI', 'max_lag_sources': 3, 'max_lag_target': 3, 'min_lag_sources': 1 } mte = MultivariateTE() res_single = mte.analyse_single_target(settings=settings, data=data, target=3) res_full = mte.analyse_network(settings=settings, data=data) # generate graph plots g_single = visualise_graph.plot_selected_vars(res_single, mte) g_full = visualise_graph.plot_network(res_full)
def test_results_network_inference(): """Test results class for multivariate TE network inference.""" covariance = 0.4 n = 10000 delay = 1 normalisation = False source = np.random.normal(0, 1, size=n) target_1 = (covariance * source + (1 - covariance) * np.random.normal(0, 1, size=n)) target_2 = (covariance * source + (1 - covariance) * np.random.normal(0, 1, size=n)) corr_expected = covariance / (1 * np.sqrt(covariance**2 + (1 - covariance)**2)) expected_mi = calculate_mi(corr_expected) source = source[delay:] target_1 = target_1[:-delay] target_2 = target_2[:-delay] # Discretise data for speed settings_dis = {'discretise_method': 'equal', 'n_discrete_bins': 5} est = JidtDiscreteCMI(settings_dis) source_dis, target_1_dis = est._discretise_vars(var1=source, var2=target_1) source_dis, target_2_dis = est._discretise_vars(var1=source, var2=target_2) data = Data(np.vstack((source_dis, target_1_dis, target_2_dis)), dim_order='ps', normalise=normalisation) nw = MultivariateTE() # TE - single target res_single_multi_te = nw.analyse_single_target(settings=settings, data=data, target=1) # TE whole network res_network_multi_te = nw.analyse_network(settings=settings, data=data) nw = BivariateTE() # TE - single target res_single_biv_te = nw.analyse_single_target(settings=settings, data=data, target=1) # TE whole network res_network_biv_te = nw.analyse_network(settings=settings, data=data) nw = MultivariateMI() # TE - single target res_single_multi_mi = nw.analyse_single_target(settings=settings, data=data, target=1) # TE whole network res_network_multi_mi = nw.analyse_network(settings=settings, data=data) nw = BivariateMI() # TE - single target res_single_biv_mi = nw.analyse_single_target(settings=settings, data=data, target=1) # TE whole network res_network_biv_mi = nw.analyse_network(settings=settings, data=data) res_te = [ res_single_multi_te, res_network_multi_te, res_single_biv_te, res_network_biv_te ] res_mi = [ res_single_multi_mi, res_network_multi_mi, res_single_biv_mi, res_network_biv_mi ] res_all = res_te + res_mi # Check estimated values for res in res_te: est_te = res._single_target[1].omnibus_te assert np.isclose(est_te, expected_mi, atol=0.05), ( 'Estimated TE for discrete variables is not correct. Expected: ' '{0}, Actual results: {1}.'.format(expected_mi, est_te)) for res in res_mi: est_mi = res._single_target[1].omnibus_mi assert np.isclose(est_mi, expected_mi, atol=0.05), ( 'Estimated TE for discrete variables is not correct. Expected: ' '{0}, Actual results: {1}.'.format(expected_mi, est_mi)) est_te = res_network_multi_te._single_target[2].omnibus_te assert np.isclose(est_te, expected_mi, atol=0.05), ( 'Estimated TE for discrete variables is not correct. Expected: {0}, ' 'Actual results: {1}.'.format(expected_mi, est_te)) est_mi = res_network_multi_mi._single_target[2].omnibus_mi assert np.isclose(est_mi, expected_mi, atol=0.05), ( 'Estimated TE for discrete variables is not correct. Expected: {0}, ' 'Actual results: {1}.'.format(expected_mi, est_mi)) # Check data parameters in results objects n_nodes = 3 n_realisations = n - delay - max(settings['max_lag_sources'], settings['max_lag_target']) for res in res_all: assert res.data_properties.n_nodes == n_nodes, 'Incorrect no. nodes.' assert res.data_properties.n_nodes == n_nodes, 'Incorrect no. nodes.' assert res.data_properties.n_realisations == n_realisations, ( 'Incorrect no. realisations.') assert res.data_properties.n_realisations == n_realisations, ( 'Incorrect no. realisations.') assert res.data_properties.normalised == normalisation, ( 'Incorrect value for data normalisation.') assert res.data_properties.normalised == normalisation, ( 'Incorrect value for data normalisation.') adj_matrix = res.get_adjacency_matrix('binary', fdr=False) assert adj_matrix._edge_matrix.shape[0] == n_nodes, ( 'Incorrect number of rows in adjacency matrix.') assert adj_matrix._edge_matrix.shape[1] == n_nodes, ( 'Incorrect number of columns in adjacency matrix.') assert adj_matrix._edge_matrix.shape[0] == n_nodes, ( 'Incorrect number of rows in adjacency matrix.') assert adj_matrix._edge_matrix.shape[1] == n_nodes, ( 'Incorrect number of columns in adjacency matrix.')
def test_return_local_values(): """Test estimation of local values.""" max_lag = 5 data = Data() data.generate_mute_data(500, 5) settings = { 'cmi_estimator': 'JidtKraskovCMI', 'noise_level': 0, 'local_values': True, # request calculation of local values 'n_perm_max_stat': 21, 'n_perm_min_stat': 21, 'n_perm_max_seq': 21, 'n_perm_omnibus': 21, 'max_lag_sources': max_lag, 'min_lag_sources': 4, 'max_lag_target': max_lag } target = 3 sources = [0, 4] te = MultivariateTE() results = te.analyse_single_target(settings, data, target=target, sources=sources) settings['local_values'] = False results_avg = te.analyse_single_target(settings, data, target=target, sources=sources) # Test if any sources were inferred. If not, return (this may happen # sometimes due to too few samples, however, a higher no. samples is not # feasible for a unit test). if results.get_single_target(target, fdr=False)['te'] is None: return if results_avg.get_single_target(target, fdr=False)['te'] is None: return lte = results.get_single_target(target, fdr=False)['te'] n_sources = len(results.get_target_sources(target, fdr=False)) assert type(lte) is np.ndarray, ( 'LTE estimation did not return an array of values: {0}'.format(lte)) assert lte.shape[0] == n_sources, ( 'Wrong dim (no. sources) in LTE estimate: {0}'.format(lte.shape)) assert lte.shape[1] == data.n_realisations_samples( (0, max_lag)), ('Wrong dim (no. samples) in LTE estimate: {0}'.format( lte.shape)) assert lte.shape[2] == data.n_replications, ( 'Wrong dim (no. replications) in LTE estimate: {0}'.format(lte.shape)) # Check if average and mean local values are the same. Test each source # separately. Inferred sources and variables may differ between the two # calls to analyse_single_target() due to low number of surrogates used in # unit testing. te_single_link = results_avg.get_single_target(target, fdr=False)['te'] sources_local = results.get_target_sources(target, fdr=False) sources_avg = results_avg.get_target_sources(target, fdr=False) for s in list(set(sources_avg).intersection(sources_local)): i1 = np.where(sources_avg == s)[0][0] i2 = np.where(sources_local == s)[0][0] # Skip comparison if inferred variables differ between links. vars_local = [ v for v in results_avg.get_single_target( target, fdr=False).selected_vars_sources if v[0] == s ] vars_avg = [ v for v in results.get_single_target( target, fdr=False).selected_vars_sources if v[0] == s ] if vars_local != vars_avg: continue print('Compare average ({0:.4f}) and local values ({1:.4f}).'.format( te_single_link[i1], np.mean(lte[i2, :, :]))) assert np.isclose( te_single_link[i1], np.mean(lte[i2, :, :]), rtol=0.00005), ( 'Single link average MI ({0:.6f}) and mean LMI ({1:.6f}) ' ' deviate.'.format(te_single_link[i1], np.mean(lte[i2, :, :])))
def test_delay_reconstruction(): """Test the reconstruction of information transfer delays from results.""" covariance = 0.4 corr_expected = covariance / ( 1 * np.sqrt(covariance**2 + (1-covariance)**2)) expected_mi = calculate_mi(corr_expected) n = 10000 delay_1 = 1 delay_2 = 3 delay_3 = 5 normalisation = False source = np.random.normal(0, 1, size=n) target_1 = (covariance * source + (1 - covariance) * np.random.normal(0, 1, size=n)) target_2 = (covariance * source + (1 - covariance) * np.random.normal(0, 1, size=n)) target_3 = (covariance * source + (1 - covariance) * np.random.normal(0, 1, size=n)) source = source[delay_3:] target_1 = target_1[(delay_3-delay_1):-delay_1] target_2 = target_2[(delay_3-delay_2):-delay_2] target_3 = target_3[:-delay_3] # Discretise data for speed settings_dis = {'discretise_method': 'equal', 'n_discrete_bins': 5} est = JidtDiscreteCMI(settings_dis) source_dis, target_1_dis = est._discretise_vars(var1=source, var2=target_1) source_dis, target_2_dis = est._discretise_vars(var1=source, var2=target_2) source_dis, target_3_dis = est._discretise_vars(var1=source, var2=target_3) data = Data( np.vstack((source_dis, target_1_dis, target_2_dis, target_3_dis)), dim_order='ps', normalise=normalisation) nw = MultivariateTE() settings = { 'cmi_estimator': 'JidtDiscreteCMI', 'discretise_method': 'none', 'n_discrete_bins': 5, # alphabet size of the variables analysed 'n_perm_max_stat': 21, 'n_perm_omnibus': 30, 'n_perm_max_seq': 30, 'min_lag_sources': 1, 'max_lag_sources': delay_3 + 1, 'max_lag_target': 1} res_network = nw.analyse_single_target( settings=settings, data=data, target=1) res_network.combine_results(nw.analyse_single_target( settings=settings, data=data, target=2)) res_network.combine_results(nw.analyse_single_target( settings=settings, data=data, target=3)) adj_mat = res_network.get_adjacency_matrix('max_te_lag', fdr=False) print(adj_mat) assert adj_mat[0, 1] == delay_1, ('Estimate for delay 1 is not correct.') assert adj_mat[0, 2] == delay_2, ('Estimate for delay 2 is not correct.') assert adj_mat[0, 3] == delay_3, ('Estimate for delay 3 is not correct.') for target in range(1, 4): est_mi = res_network._single_target[target].omnibus_te assert np.isclose(est_mi, expected_mi, atol=0.05), ( 'Estimated TE for target {0} is not correct. Expected: {1}, ' 'Actual results: {2}.'.format(target, expected_mi, est_mi))
from idtxl.multivariate_te import MultivariateTE from idtxl.data import Data start_time = time.time() # load simulated data from 2 coupled Lorenz systems 1->2, u = 45 ms d = np.load(os.path.join(os.path.dirname(__file__), 'data/lorenz_2_exampledata.npy')) data = Data() data.set_data(d[:, :, 0:100], 'psr') settings = { 'cmi_estimator': 'OpenCLKraskovCMI', 'max_lag_sources': 50, 'min_lag_sources': 40, 'max_lag_target': 30, 'tau_sources': 1, 'tau_target': 3, 'n_perm_max_stat': 200, 'n_perm_min_stat': 200, 'n_perm_omnibus': 500, 'n_perm_max_seq': 500, } lorenz_analysis = MultivariateTE() res_0 = lorenz_analysis.analyse_single_target(settings, data, 0) res_1 = lorenz_analysis.analyse_single_target(settings, data, 1) runtime = time.time() - start_time print("---- {0} minutes".format(runtime / 60)) path = '{0}data/'.format(os.path.dirname(__file__)) pickle.dump(res_0, open('{0}test_lorenz_opencl_res_{1}'.format(path, 0), 'wb')) pickle.dump(res_1, open('{0}test_lorenz_opencl_res_{1}'.format(path, 1), 'wb'))
def test_multivariate_te_corr_gaussian(estimator=None): """Test multivariate TE estimation on correlated Gaussians. Run the multivariate TE algorithm on two sets of random Gaussian data with a given covariance. The second data set is shifted by one sample creating a source-target delay of one sample. This example is modeled after the JIDT demo 4 for transfer entropy. The resulting TE can be compared to the analytical result (but expect some error in the estimate). The simulated delay is 1 sample, i.e., the algorithm should find significant TE from sample (0, 1), a sample in process 0 with lag/delay 1. The final target sample should always be (1, 1), the mandatory sample at lat 1, because there is no memory in the process. Note: This test runs considerably faster than other system tests. This produces strange small values for non-coupled sources. TODO """ if estimator is None: estimator = 'JidtKraskovCMI' n = 1000 cov = 0.4 source = [rn.normalvariate(0, 1) for r in range(n)] target = [ sum(pair) for pair in zip([cov * y for y in source], [(1 - cov) * y for y in [rn.normalvariate(0, 1) for r in range(n)]]) ] # Cast everything to numpy so the idtxl estimator understands it. source = np.expand_dims(np.array(source), axis=1) target = np.expand_dims(np.array(target), axis=1) data = Data(normalise=True) data.set_data(np.vstack((source[1:].T, target[:-1].T)), 'ps') settings = { 'cmi_estimator': estimator, 'max_lag_sources': 5, 'min_lag_sources': 1, 'max_lag_target': 5, 'n_perm_max_stat': 21, 'n_perm_min_stat': 21, 'n_perm_omnibus': 21, 'n_perm_max_seq': 21, } random_analysis = MultivariateTE() results = random_analysis.analyse_single_target(settings, data, 1) # Assert that there are significant conditionals from the source for target # 1. For 500 repetitions I got mean errors of 0.02097686 and 0.01454073 for # examples 1 and 2 respectively. The maximum errors were 0.093841 and # 0.05833172 repectively. This inspired the following error boundaries. corr_expected = cov / (1 * np.sqrt(cov**2 + (1 - cov)**2)) expected_res = calculate_mi(corr_expected) estimated_res = results.get_single_target(1, fdr=False).omnibus_te diff = np.abs(estimated_res - expected_res) print('Expected source sample: (0, 1)\nExpected target sample: (1, 1)') print(('Estimated TE: {0:5.4f}, analytical result: {1:5.4f}, error:' '{2:2.2f} % ').format(estimated_res, expected_res, diff / expected_res)) assert (diff < 0.1), ('Multivariate TE calculation for correlated ' 'Gaussians failed (error larger 0.1: {0}, expected: ' '{1}, actual: {2}).'.format(diff, expected_res, estimated_res))
data.data.shape # In[6]: # b) Initialise analysis object and define settings network_analysis = MultivariateTE() settings = { 'cmi_estimator': 'JidtGaussianCMI', 'max_lag_sources': 16, 'min_lag_sources': 1 } # c) Run analysis results = network_analysis.analyse_single_target(settings=settings, data=data, target=0, sources=[1, 3]) # In[7]: results.print_edge_list(weights='max_te_lag', fdr=False) plot_network(results=results, weights='max_te_lag', fdr=False) plt.show() # ### Bivariate TE / Granger causality # In[8]: # %load ./IDTxl/demos/demo_bivariate_te.py # Import classes from idtxl.bivariate_te import BivariateTE