Exemplo n.º 1
0
def multivariate_pid_4D(data, settings):
    src, trg = _parse_channels(settings, dim=4)
    dataEff = _shuffle_target(data, trg, settings)

    # np.save('test.npy', dataEff)

    # print(settings)
    # print("Check1", dataEff.shape, dataEff.dtype, src, trg)
    # print('Check2', issubclass(dataEff.dtype.type, np.integer))
    # print('Check3', [issubclass(dataEff[:, i].dtype.type, np.integer) for i in src])
    # print('Check4', issubclass(dataEff[:, trg].dtype.type, np.integer))

    dataIDTxl = Data(dataEff, dim_order='rps', normalise=False)
    pid = MultivariatePID()

    rez = pid.analyse_single_target(settings=settings['settings_estimator'],
                                    data=dataIDTxl,
                                    target=trg,
                                    sources=src)

    return np.array([
        rez.get_single_target(trg)['avg'][k][2]
        for k in multivariate_pid_key(dim=4)
    ])
Exemplo n.º 2
0
def test_single_source_storage_gaussian():
    n = 1000
    np.random.seed(SEED)
    proc_1 = np.random.normal(0, 1, size=n)
    proc_2 = np.random.normal(0, 1, size=n)
    # Cast everything to numpy so the idtxl estimator understands it.
    data = Data(np.array([proc_1, proc_2]), dim_order='ps')
    settings = {
        'cmi_estimator': 'JidtKraskovCMI',
        'alpha_mi': 0.05,
        'tail_mi': 'one_bigger',
        'n_perm_max_stat': 21,
        'n_perm_min_stat': 21,
        'n_perm_mi': 21,
        'max_lag': 5,
        'tau': 1
    }
    processes = [1]
    network_analysis = ActiveInformationStorage()
    results = network_analysis.analyse_network(settings, data, processes)
    print('AIS for random normal data without memory (expected is NaN): '
          '{0}'.format(results._single_process[1].ais))
    assert results._single_process[1].ais is np.nan, (
        'Estimator did not return nan for memoryless data.')
def idtxlParallelCPU(data, settings, NCore=None):
    # Get number of processes
    idxProcesses = settings['dim_order'].index("p")
    NProcesses = data.shape[idxProcesses]

    # Convert data to ITDxl format
    dataIDTxl = Data(data, dim_order=settings['dim_order'])

    # Initialise analysis object
    analysis_class = getAnalysisClass(settings['method'])

    # Initialize multiprocessing pool
    if NCore is None:
        NCore = pathos.multiprocessing.cpu_count() - 1
    pool = pathos.multiprocessing.ProcessingPool(NCore)
    #pool = multiprocessing.Pool(NCore)

    #with contextlib.redirect_stdout(open('log_out.txt', 'w')):
    #    with contextlib.redirect_stderr(open('log_err.txt', 'w')):
    targetLst = list(range(NProcesses))
    parallelTask = lambda trg: analysis_class.analyse_single_target(
        settings=settings, data=dataIDTxl, target=trg)
    rez = pool.map(parallelTask, targetLst)
    return rez
Exemplo n.º 4
0
def test_max_statistic_sequential():
    dat = Data()
    dat.generate_mute_data(104, 10)
    settings = {
        'cmi_estimator': 'JidtKraskovCMI',
        'n_perm_max_stat': 21,
        'n_perm_min_stat': 21,
        'n_perm_omnibus': 21,
        'n_perm_max_seq': 21,
        'max_lag_sources': 5,
        'min_lag_sources': 1,
        'max_lag_target': 5
    }
    setup = MultivariateTE()
    setup._initialise(settings, dat, sources=[0, 1], target=2)
    setup.current_value = (0, 4)
    setup.selected_vars_sources = [(1, 1), (1, 2)]
    setup.selected_vars_full = [(0, 1), (1, 1), (1, 2)]
    setup._selected_vars_realisations = np.random.rand(
        dat.n_realisations(setup.current_value), len(setup.selected_vars_full))
    setup._current_value_realisations = np.random.rand(
        dat.n_realisations(setup.current_value), 1)
    [sign, p, te] = stats.max_statistic_sequential(analysis_setup=setup,
                                                   data=dat)
Exemplo n.º 5
0
def test_multivariate_te_multiple_runs():
    """Test TE estimation using multiple runs on the GPU.

    Test if data is correctly split over multiple runs, if the problem size
    exceeds the GPU global memory and thus requires multiple runs. Using a
    number of permutations of 7000 requires two runs on a GPU with global
    memory of about 6 GB.
    """
    dat = Data()
    dat.generate_mute_data(n_samples=1000, n_replications=10)
    settings = {
        'cmi_estimator': 'OpenCLKraskovCMI',
        'max_lag_sources': 3,
        'min_lag_sources': 1,
        'max_lag_target': 3,
        'n_perm_max_stat': 7000,
        'n_perm_min_stat': 7000,
        'n_perm_omnibus': 21,
        'n_perm_max_seq': 21
    }  # this should be equal to the min stats b/c we
    # reuse the surrogate table from the min stats

    network_analysis = MultivariateTE()
    network_analysis.analyse_network(dat, settings, targets=[1, 2])
def test_compare_jidt_open_cl_estimator():
    """Compare results from OpenCl and JIDT estimators for AIS calculation."""
    dat = Data()
    dat.generate_mute_data(1000, 2)
    settings = {
        'cmi_estimator': 'OpenCLKraskovCMI',
        'n_perm_mi': 22,
        'alpha_mi': 0.05,
        'tail_mi': 'one_bigger',
        'n_perm_max_stat': 21,
        'max_lag': 5,
        'tau': 1
        }
    processes = [2, 3]
    network_analysis = ActiveInformationStorage()
    res_opencl = network_analysis.analyse_network(settings, dat, processes)
    settings['cmi_estimator'] = 'JidtKraskovCMI'
    res_jidt = network_analysis.analyse_network(settings, dat, processes)
    # Note that I require equality up to three digits. Results become more
    # exact for bigger data sizes, but this takes too long for a unit test.
    print('AIS for MUTE data proc 2 - opencl: {0} and jidt: {1}'.format(
                                    res_opencl[2]['ais'], res_jidt[2]['ais']))
    print('AIS for MUTE data proc 3 - opencl: {0} and jidt: {1}'.format(
                                    res_opencl[3]['ais'], res_jidt[3]['ais']))
    if not (res_opencl[2]['ais'] is np.nan or res_jidt[2]['ais'] is np.nan):
        assert (res_opencl[2]['ais'] - res_jidt[2]['ais']) < 0.05, (
                       'AIS results differ between OpenCl and JIDT estimator.')
    else:
        assert res_opencl[2]['ais'] is res_jidt[2]['ais'], (
                       'AIS results differ between OpenCl and JIDT estimator.')
    if not (res_opencl[3]['ais'] is np.nan or res_jidt[3]['ais'] is np.nan):
        assert (res_opencl[3]['ais'] - res_jidt[3]['ais']) < 0.05, (
                       'AIS results differ between OpenCl and JIDT estimator.')
    else:
        assert res_opencl[3]['ais'] is res_jidt[3]['ais'], (
                       'AIS results differ between OpenCl and JIDT estimator.')
Exemplo n.º 7
0
def test_calculate_mean():
    """Test if mean over CMI estimates is calculated correctly."""
    dat = Data()
    dat.generate_mute_data(100, 5)
    res_0 = np.load(
        os.path.join(os.path.dirname(__file__), 'data/mute_res_0.pkl'))
    comparison_opts = {
        'cmi_calc_name': 'jidt_kraskov',
        'n_perm_max_stat': 50,
        'n_perm_min_stat': 50,
        'n_perm_omnibus': 200,
        'n_perm_max_seq': 50,
        'tail': 'two',
        'n_perm_comp': 6,
        'alpha_comp': 0.2,
        'stats_type': 'dependent'
    }
    comp = Network_comparison(comparison_opts)
    comp._create_union(res_0)
    cmi = comp._calculate_cmi(dat)
    cmi_mean = comp._calculate_mean([cmi, cmi])
    for t in comp.union['targets']:
        assert (cmi_mean[t] == cmi[t]).all(), ('Error in mean of CMI for '
                                               'target {0}'.format(t))
Exemplo n.º 8
0
def test_add_conditional_manually():
    """Enforce the conditioning on additional variables."""
    settings = {'cmi_estimator': 'JidtKraskovCMI',
                'max_lag_sources': 5,
                'min_lag_sources': 3}
    nw = BivariateMI()
    data = Data()
    data.generate_mute_data()

    # Add a conditional with a lag bigger than the max_lag requested above
    settings['add_conditionals'] = (8, 0)
    with pytest.raises(IndexError):
        nw.analyse_single_target(settings=settings, data=data, target=0)

    # Add valid conditionals and test if they were added
    settings['add_conditionals'] = [(0, 1), (1, 3)]
    nw._initialise(settings=settings, data=data, target=0, sources=[1, 2])
    # Get list of conditionals after intialisation and convert absolute samples
    # back to lags for comparison.
    cond_list = nw._idx_to_lag(nw.selected_vars_full)
    assert settings['add_conditionals'][0] in cond_list, (
        'First enforced conditional is missing from results.')
    assert settings['add_conditionals'][1] in cond_list, (
        'Second enforced conditional is missing from results.')
Exemplo n.º 9
0
def infer_network(network_inference,
                  time_series,
                  parallel_target_analysis=False):
    # Define parameter options dictionaries
    network_inference_algorithms = pd.DataFrame()
    network_inference_algorithms['Description'] = pd.Series({
        'bMI_greedy':
        'Bivariate Mutual Information via greedy algorithm',
        'bTE_greedy':
        'Bivariate Transfer Entropy via greedy algorithm',
        'mMI_greedy':
        'Multivariate Mutual Information via greedy algorithm',
        'mTE_greedy':
        'Multivariate Transfer Entropy via greedy algorithm',
        'cross_corr':
        'Cross-correlation thresholding algorithm'
    })
    network_inference_algorithms['Required parameters'] = pd.Series({
        'bMI_greedy': [
            'min_lag_sources', 'max_lag_sources', 'tau_sources', 'tau_target',
            'cmi_estimator', 'z_standardise', 'permute_in_time',
            'n_perm_max_stat', 'n_perm_min_stat', 'n_perm_omnibus',
            'n_perm_max_seq', 'fdr_correction', 'p_value'
            # 'alpha_max_stats',
            # 'alpha_min_stats',
            # 'alpha_omnibus',
            # 'alpha_max_seq',
            # 'alpha_fdr'
        ],
        'bTE_greedy': [
            'min_lag_sources', 'max_lag_sources', 'tau_sources',
            'max_lag_target', 'tau_target', 'cmi_estimator', 'z_standardise',
            'permute_in_time', 'n_perm_max_stat', 'n_perm_min_stat',
            'n_perm_omnibus', 'n_perm_max_seq', 'fdr_correction', 'p_value'
            # 'alpha_max_stats',
            # 'alpha_min_stats',
            # 'alpha_omnibus',
            # 'alpha_max_seq',
            # 'alpha_fdr'
        ],
        'mMI_greedy': [
            'min_lag_sources', 'max_lag_sources', 'tau_sources', 'tau_target',
            'cmi_estimator', 'z_standardise', 'permute_in_time',
            'n_perm_max_stat', 'n_perm_min_stat', 'n_perm_omnibus',
            'n_perm_max_seq', 'fdr_correction', 'p_value'
            # 'alpha_max_stats',
            # 'alpha_min_stats',
            # 'alpha_omnibus',
            # 'alpha_max_seq',
            # 'alpha_fdr'
        ],
        'mTE_greedy': [
            'min_lag_sources', 'max_lag_sources', 'tau_sources',
            'max_lag_target', 'tau_target', 'cmi_estimator', 'z_standardise',
            'permute_in_time', 'n_perm_max_stat', 'n_perm_min_stat',
            'n_perm_omnibus', 'n_perm_max_seq', 'fdr_correction', 'p_value'
            # 'alpha_max_stats',
            # 'alpha_min_stats',
            # 'alpha_omnibus',
            # 'alpha_max_seq',
            # 'alpha_fdr'
        ],
        'cross_corr': ['min_lag_sources', 'max_lag_sources']
    })
    try:
        # Ensure that a network inference algorithm has been specified
        if 'algorithm' not in network_inference:
            raise ParameterMissing('algorithm')
        # Ensure that the provided algorithm is implemented
        if network_inference.algorithm not in network_inference_algorithms.index:
            raise ParameterValue(network_inference.algorithm)
        # Ensure that all the parameters required by the algorithm have been provided
        par_required = network_inference_algorithms['Required parameters'][
            network_inference.algorithm]
        for par in par_required:
            if par not in network_inference:
                raise ParameterMissing(par)

    except ParameterMissing as e:
        print(e.msg, e.par_names)
        raise
    except ParameterValue as e:
        print(e.msg, e.par_value)
        raise

    else:
        nodes_n = np.shape(time_series)[0]

        can_be_z_standardised = True
        if network_inference.z_standardise:
            # Check if data can be normalised per process (assuming the
            # first dimension represents processes, as in the rest of the code)
            can_be_z_standardised = np.all(np.std(time_series, axis=1) > 0)
            if not can_be_z_standardised:
                print('Time series can not be z-standardised')

        if len(time_series.shape) == 2:
            dim_order = 'ps'
        else:
            dim_order = 'psr'

        # initialise an empty data object
        dat = Data()

        # Load time series
        dat = Data(time_series,
                   dim_order=dim_order,
                   normalise=(network_inference.z_standardise
                              & can_be_z_standardised))

        algorithm = network_inference.algorithm
        if algorithm in [
                'bMI_greedy', 'mMI_greedy', 'bTE_greedy', 'mTE_greedy'
        ]:
            # Set analysis options
            if algorithm == 'bMI_greedy':
                network_analysis = BivariateMI()
            if algorithm == 'mMI_greedy':
                network_analysis = MultivariateMI()
            if algorithm == 'bTE_greedy':
                network_analysis = BivariateTE()
            if algorithm == 'mTE_greedy':
                network_analysis = MultivariateTE()

            settings = {
                'min_lag_sources': network_inference.min_lag_sources,
                'max_lag_sources': network_inference.max_lag_sources,
                'tau_sources': network_inference.tau_sources,
                'max_lag_target': network_inference.max_lag_target,
                'tau_target': network_inference.tau_target,
                'cmi_estimator': network_inference.cmi_estimator,
                'kraskov_k': network_inference.kraskov_k,
                'num_threads': network_inference.jidt_threads_n,
                'permute_in_time': network_inference.permute_in_time,
                'n_perm_max_stat': network_inference.n_perm_max_stat,
                'n_perm_min_stat': network_inference.n_perm_min_stat,
                'n_perm_omnibus': network_inference.n_perm_omnibus,
                'n_perm_max_seq': network_inference.n_perm_max_seq,
                'fdr_correction': network_inference.fdr_correction,
                'alpha_max_stat': network_inference.p_value,
                'alpha_min_stat': network_inference.p_value,
                'alpha_omnibus': network_inference.p_value,
                'alpha_max_seq': network_inference.p_value,
                'alpha_fdr': network_inference.p_value
            }

            # # Add optional settings
            # optional_settings_keys = {
            #     'config.debug',
            #     'config.max_mem_frac'
            # }

            # for key in optional_settings_keys:
            #     if traj.f_contains(key, shortcuts=True):
            #         key_last = key.rpartition('.')[-1]
            #         settings[key_last] = traj[key]
            #         print('Using optional setting \'{0}\'={1}'.format(
            #             key_last,
            #             traj[key])
            #         )

            if parallel_target_analysis:
                # Use SCOOP to create a generator of map results, each
                # correspinding to one map iteration
                res_iterator = futures.map_as_completed(
                    network_analysis.analyse_single_target,
                    itertools.repeat(settings, nodes_n),
                    itertools.repeat(dat, nodes_n), list(range(nodes_n)))
                # Run analysis
                res_list = list(res_iterator)
                if settings['fdr_correction']:
                    res = network_fdr({'alpha_fdr': settings['alpha_fdr']},
                                      *res_list)
                else:
                    res = res_list[0]
                    res.combine_results(*res_list[1:])
            else:
                # Run analysis
                res = network_analysis.analyse_network(settings=settings,
                                                       data=dat)
            return res

        else:
            raise ParameterValue(
                algorithm,
                msg='Network inference algorithm not yet implemented')
Exemplo n.º 10
0
def test_return_local_values():
    """Test estimation of local values."""
    max_lag = 5
    data = Data()
    data.generate_mute_data(500, 5)
    settings = {
        'cmi_estimator': 'JidtKraskovCMI',
        'local_values': True,  # request calculation of local values
        'n_perm_max_stat': 21,
        'n_perm_min_stat': 21,
        'n_perm_max_seq': 21,
        'n_perm_omnibus': 21,
        'max_lag_sources': max_lag,
        'min_lag_sources': 4,
        'max_lag_target': max_lag
    }
    target = 1
    mi = MultivariateMI()
    results = mi.analyse_network(settings, data, targets=[target])

    # Test if any sources were inferred. If not, return (this may happen
    # sometimes due to too few samples, however, a higher no. samples is not
    # feasible for a unit test).
    if results.get_single_target(target, fdr=False)['mi'] is None:
        return

    lmi = results.get_single_target(target, fdr=False)['mi']
    n_sources = len(results.get_target_sources(target, fdr=False))
    assert type(lmi) is np.ndarray, (
        'LMI estimation did not return an array of values: {0}'.format(lmi))
    assert lmi.shape[0] == n_sources, (
        'Wrong dim (no. sources) in LMI estimate: {0}'.format(lmi.shape))
    assert lmi.shape[1] == data.n_realisations_samples(
        (0, max_lag)), ('Wrong dim (no. samples) in LMI estimate: {0}'.format(
            lmi.shape))
    assert lmi.shape[2] == data.n_replications, (
        'Wrong dim (no. replications) in LMI estimate: {0}'.format(lmi.shape))

    # Test for correctnes of single link MI estimation by comparing it to the
    # omnibus MI. In this case (single source), the two should be the same.
    # Skip assertion if more than one source was inferred (this happens
    # sometime due to random data and low no. permutations for statistical
    # testing in unit tests).
    settings['local_values'] = False
    results_avg = mi.analyse_network(settings, data, targets=[target])
    if results_avg.get_single_target(target, fdr=False)['mi'] is None:
        return
    mi_single_link = results_avg.get_single_target(target, fdr=False)['mi']
    mi_omnibus = results_avg.get_single_target(target, fdr=False)['omnibus_mi']
    sources_local = results.get_target_sources(target, fdr=False)
    sources_avg = results_avg.get_target_sources(target, fdr=False)
    if len(sources_avg) == 1:
        print('Compare single link and omnibus MI.')
        assert np.isclose(mi_single_link, mi_omnibus, rtol=0.00005), (
            'Single link MI ({0:.6f}) is not equal to omnibus information '
            '({1:.6f}).'.format(mi_single_link[0], mi_omnibus))
    # Check if average and mean local values are the same. Test each source
    # separately. Inferred sources may differ between the two calls to
    # analyse_network() due to low number of surrogates used in unit testing.
    for s in list(set(sources_avg).intersection(sources_local)):
        print('Compare average and local values.')
        i1 = np.where(sources_avg == s)[0][0]
        i2 = np.where(sources_local == s)[0][0]
        assert np.isclose(
            mi_single_link[i1], np.mean(lmi[i2, :, :]), rtol=0.00005), (
                'Single link average MI ({0:.6f}) and mean LMI ({1:.6f}) '
                ' deviate.'.format(mi_single_link, np.mean(lmi)))
Exemplo n.º 11
0
def test_multivariate_te_corr_gaussian(estimator=None):
    """Test multivariate TE estimation on correlated Gaussians.

    Run the multivariate TE algorithm on two sets of random Gaussian data with
    a given covariance. The second data set is shifted by one sample creating
    a source-target delay of one sample. This example is modeled after the
    JIDT demo 4 for transfer entropy. The resulting TE can be compared to the
    analytical result (but expect some error in the estimate).

    The simulated delay is 1 sample, i.e., the algorithm should find
    significant TE from sample (0, 1), a sample in process 0 with lag/delay 1.
    The final target sample should always be (1, 1), the mandatory sample at
    lat 1, because there is no memory in the process.

    Note:
        This test runs considerably faster than other system tests.
        This produces strange small values for non-coupled sources.  TODO
    """
    if estimator is None:
        estimator = 'JidtKraskovCMI'

    n = 1000
    cov = 0.4
    source = [rn.normalvariate(0, 1) for r in range(n)]
    target = [
        sum(pair)
        for pair in zip([cov * y for y in source],
                        [(1 - cov) * y
                         for y in [rn.normalvariate(0, 1) for r in range(n)]])
    ]
    # Cast everything to numpy so the idtxl estimator understands it.
    source = np.expand_dims(np.array(source), axis=1)
    target = np.expand_dims(np.array(target), axis=1)

    data = Data(normalise=True)
    data.set_data(np.vstack((source[1:].T, target[:-1].T)), 'ps')
    settings = {
        'cmi_estimator': estimator,
        'max_lag_sources': 5,
        'min_lag_sources': 1,
        'max_lag_target': 5,
        'n_perm_max_stat': 21,
        'n_perm_min_stat': 21,
        'n_perm_omnibus': 21,
        'n_perm_max_seq': 21,
    }
    random_analysis = MultivariateTE()
    results = random_analysis.analyse_single_target(settings, data, 1)

    # Assert that there are significant conditionals from the source for target
    # 1. For 500 repetitions I got mean errors of 0.02097686 and 0.01454073 for
    # examples 1 and 2 respectively. The maximum errors were 0.093841 and
    # 0.05833172 repectively. This inspired the following error boundaries.
    corr_expected = cov / (1 * np.sqrt(cov**2 + (1 - cov)**2))
    expected_res = calculate_mi(corr_expected)
    estimated_res = results.get_single_target(1, fdr=False).omnibus_te
    diff = np.abs(estimated_res - expected_res)
    print('Expected source sample: (0, 1)\nExpected target sample: (1, 1)')
    print(('Estimated TE: {0:5.4f}, analytical result: {1:5.4f}, error:'
           '{2:2.2f} % ').format(estimated_res, expected_res,
                                 diff / expected_res))
    assert (diff < 0.1), ('Multivariate TE calculation for correlated '
                          'Gaussians failed (error larger 0.1: {0}, expected: '
                          '{1}, actual: {2}).'.format(diff, expected_res,
                                                      estimated_res))
Exemplo n.º 12
0
def test_pid_xor_data():
    """Test basic calls to PID class."""
    n = 100
    alph = 2
    x = np.random.randint(0, alph, n)
    y = np.random.randint(0, alph, n)
    z = np.logical_xor(x, y).astype(int)
    data = Data(np.vstack((x, y, z)), 'ps', normalise=False)

    # Run Tartu estimator
    settings = {'pid_estimator': 'TartuPID', 'lags_pid': [0, 0]}
    pid = BivariatePID()
    tic = tm.time()
    est_tartu = pid.analyse_single_target(settings, data=data, target=2,
                                          sources=[0, 1])
    t_tartu = tm.time() - tic

    # Run Sydney estimator
    settings = {
        'n_perm': 11,
        'alpha': 0.1,
        'alph_s1': alph,
        'alph_s2': alph,
        'alph_t': alph,
        'max_unsuc_swaps_row_parm': 60,
        'num_reps': 63,
        'max_iters': 1000,
        'pid_estimator': 'SydneyPID',
        'lags_pid': [0, 0]}
    pid = BivariatePID()
    tic = tm.time()
    est_sydney = pid.analyse_single_target(settings, data=data, target=2,
                                           sources=[0, 1])
    t_sydney = tm.time() - tic

    print('\nResults Tartu estimator:')
    utils.print_dict(est_tartu.get_single_target(2))
    print('\nResults Sydney estimator:')
    utils.print_dict(est_sydney.get_single_target(2))

    print('\nLogical XOR')
    print('Estimator            Sydney\t\tTartu\n')
    print('PID evaluation       {:.3f} s\t\t{:.3f} s\n'.format(t_sydney,
                                                               t_tartu))
    print('Uni s1               {0:.8f}\t\t{1:.8f}'.format(
        est_sydney._single_target[2]['unq_s1'],
        est_tartu._single_target[2]['unq_s1']))
    print('Uni s2               {0:.8f}\t\t{1:.8f}'.format(
        est_sydney._single_target[2]['unq_s2'],
        est_tartu._single_target[2]['unq_s2']))
    print('Shared s1_s2         {0:.8f}\t\t{1:.8f}'.format(
        est_sydney._single_target[2]['shd_s1_s2'],
        est_tartu._single_target[2]['shd_s1_s2']))
    print('Synergy s1_s2        {0:.8f}\t\t{1:.8f}'.format(
        est_sydney._single_target[2]['syn_s1_s2'],
        est_tartu._single_target[2]['syn_s1_s2']))
    assert 0.9 < est_sydney._single_target[2]['syn_s1_s2'] <= 1.1, (
            'Sydney estimator incorrect synergy: {0}, should approx. 1'.format(
                est_sydney._single_target[2]['syn_s1_s2']))
    assert 0.9 < est_tartu._single_target[2]['syn_s1_s2'] <= 1.1, (
            'Tartu estimator incorrect synergy: {0}, should approx. 1'.format(
                est_tartu._single_target[2]['syn_s1_s2']))
def test_return_local_values():
    """Test estimation of local values."""
    max_lag = 5
    data = Data()
    data.generate_mute_data(500, 5)
    settings = {
        'cmi_estimator': 'JidtKraskovCMI',
        'noise_level': 0,
        'local_values': True,  # request calculation of local values
        'n_perm_max_stat': 21,
        'n_perm_min_stat': 21,
        'n_perm_max_seq': 21,
        'n_perm_omnibus': 21,
        'max_lag_sources': max_lag,
        'min_lag_sources': 4,
        'max_lag_target': max_lag
    }
    target = 3
    sources = [0, 4]
    mi = MultivariateMI()
    results = mi.analyse_single_target(settings,
                                       data,
                                       target=target,
                                       sources=sources)
    settings['local_values'] = False
    results_avg = mi.analyse_single_target(settings,
                                           data,
                                           target=target,
                                           sources=sources)

    # Test if any sources were inferred. If not, return (this may happen
    # sometimes due to too few samples, however, a higher no. samples is not
    # feasible for a unit test).
    if results.get_single_target(target, fdr=False)['mi'] is None:
        return
    if results_avg.get_single_target(target, fdr=False)['mi'] is None:
        return

    lmi = results.get_single_target(target, fdr=False)['mi']
    n_sources = len(results.get_target_sources(target, fdr=False))
    assert type(lmi) is np.ndarray, (
        'LMI estimation did not return an array of values: {0}'.format(lmi))
    assert lmi.shape[0] == n_sources, (
        'Wrong dim (no. sources) in LMI estimate: {0}'.format(lmi.shape))
    assert lmi.shape[1] == data.n_realisations_samples(
        (0, max_lag)), ('Wrong dim (no. samples) in LMI estimate: {0}'.format(
            lmi.shape))
    assert lmi.shape[2] == data.n_replications, (
        'Wrong dim (no. replications) in LMI estimate: {0}'.format(lmi.shape))

    # Check if average and mean local values are the same. Test each source
    # separately. Inferred sources and variables may differ between the two
    # calls to analyse_single_target() due to low number of surrogates used in
    # unit testing.
    mi_single_link = results_avg.get_single_target(target, fdr=False)['mi']
    sources_local = results.get_target_sources(target, fdr=False)
    sources_avg = results_avg.get_target_sources(target, fdr=False)
    for s in list(set(sources_avg).intersection(sources_local)):
        i1 = np.where(sources_avg == s)[0][0]
        i2 = np.where(sources_local == s)[0][0]
        # Skip comparison if inferred variables differ between links.
        vars_local = [
            v for v in results.get_single_target(
                target, fdr=False).selected_vars_sources if v[0] == s
        ]
        vars_avg = [
            v for v in results_avg.get_single_target(
                target, fdr=False).selected_vars_sources if v[0] == s
        ]
        if vars_local != vars_avg:
            continue
        print('Compare average ({0:.4f}) and local values ({1:.4f}).'.format(
            mi_single_link[i1], np.mean(lmi[i2, :, :])))
        assert np.isclose(
            mi_single_link[i1], np.mean(lmi[i2, :, :]), rtol=0.00005), (
                'Single link average MI ({0:.6f}) and mean LMI ({1:.6f}) '
                ' deviate.'.format(mi_single_link[i1], np.mean(lmi[i2, :, :])))
Exemplo n.º 14
0
def test_assertions():
    """Test if input checks raise errors."""
    data = Data()
    data.generate_mute_data(100, 5)

    # Load previously generated example data
    path = os.path.join(os.path.dirname(__file__), 'data/')
    res_0 = pickle.load(open(path + 'mute_results_0.p', 'rb'))
    res_1 = pickle.load(open(path + 'mute_results_1.p', 'rb'))
    res_2 = pickle.load(open(path + 'mute_results_2.p', 'rb'))
    res_3 = pickle.load(open(path + 'mute_results_3.p', 'rb'))

    # comparison settings
    comp_settings = {
        'cmi_estimator': 'JidtKraskovCMI',
        'n_perm_max_stat': 50,
        'n_perm_min_stat': 50,
        'n_perm_omnibus': 200,
        'n_perm_max_seq': 50,
        'tail': 'two'
    }

    # no. permutations insufficient for requested alpha
    comp_settings['n_perm_comp'] = 6
    comp_settings['alpha_comp'] = 0.001
    comp_settings['stats_type'] = 'independent'
    comp = NetworkComparison()
    with pytest.raises(RuntimeError):
        comp._initialise(comp_settings)

    # data sets have unequal no. replications
    dat2 = Data()
    dat2.generate_mute_data(100, 3)
    comp_settings['stats_type'] = 'dependent'
    comp_settings['alpha_comp'] = 0.05
    comp_settings['n_perm_comp'] = 1000
    comp = NetworkComparison()
    with pytest.raises(AssertionError):
        comp.compare_within(comp_settings, res_0, res_1, data, dat2)

    # data sets have unequal no. realisations
    dat2 = Data()
    dat2.generate_mute_data(80, 5)
    comp_settings['stats_type'] = 'dependent'
    comp_settings['alpha_comp'] = 0.05
    comp_settings['n_perm_comp'] = 21
    comp = NetworkComparison()
    with pytest.raises(RuntimeError):
        comp.compare_within(comp_settings, res_0, res_1, data, dat2)

    # no. replications/subjects too small for dependent-samples test
    comp_settings['stats_type'] = 'dependent'
    comp_settings['n_perm_comp'] = 1000
    comp = NetworkComparison()
    with pytest.raises(RuntimeError):  # between
        comp.compare_between(comp_settings,
                             network_set_a=np.array((res_0, res_1)),
                             network_set_b=np.array((res_2, res_3)),
                             data_set_a=np.array((data, data)),
                             data_set_b=np.array((data, data)))
    with pytest.raises(RuntimeError):  # within
        comp.compare_within(comp_settings, res_0, res_1, dat2, dat2)

    # no. replications/subjects too small for independent-samples test
    comp_settings['stats_type'] = 'independent'
    comp = NetworkComparison()
    with pytest.raises(RuntimeError):  # between
        comp.compare_between(comp_settings,
                             network_set_a=np.array((res_0, res_1)),
                             network_set_b=np.array((res_2, res_3)),
                             data_set_a=np.array((data, data)),
                             data_set_b=np.array((data, data)))
    with pytest.raises(RuntimeError):  # within
        comp.compare_within(comp_settings, res_0, res_1, dat2, dat2)

    # add target to network that is not in the data object
    dat2 = Data(np.random.rand(2, 1000, 50), dim_order='psr')
    comp_settings['alpha_comp'] = 0.05
    comp_settings['n_perm_comp'] = 21
    comp = NetworkComparison()
    with pytest.raises(IndexError):
        comp.compare_within(comp_settings, res_0, res_2, dat2, dat2)
Exemplo n.º 15
0
def test_analyse_network():
    """Test method for full network analysis."""
    n_processes = 5  # the MuTE network has 5 nodes
    dat = Data()
    dat.generate_mute_data(10, 5)
    settings = {
        'cmi_estimator': 'JidtKraskovCMI',
        'n_perm_max_stat': 21,
        'n_perm_max_seq': 21,
        'n_perm_omnibus': 30,
        'max_lag_sources': 5,
        'min_lag_sources': 4,
        'max_lag_target': 5
    }
    nw_0 = BivariateTE()

    # Test all to all analysis
    r = nw_0.analyse_network(settings, dat, targets='all', sources='all')
    try:
        del r['fdr_corrected']
    except:
        pass
    k = list(r.keys())
    sources = np.arange(n_processes)
    assert all(np.array(k) == np.arange(n_processes)), (
        'Network analysis did not run on all targets.')
    for t in r.keys():
        s = np.array(list(set(sources) - set([t])))
        assert all(np.array(r[t]['sources_tested']) == s), (
            'Network analysis did not run on all sources for target '
            '{0}'.format(t))
    # Test analysis for subset of targets
    target_list = [1, 2, 3]
    r = nw_0.analyse_network(settings, dat, targets=target_list, sources='all')
    try:
        del r['fdr_corrected']
    except:
        pass
    k = list(r.keys())
    assert all(np.array(k) == np.array(target_list)), (
        'Network analysis did not run on correct subset of targets.')
    for t in r.keys():
        s = np.array(list(set(sources) - set([t])))
        assert all(np.array(r[t]['sources_tested']) == s), (
            'Network analysis did not run on all sources for target '
            '{0}'.format(t))

    # Test analysis for subset of sources
    source_list = [1, 2, 3]
    target_list = [0, 4]
    r = nw_0.analyse_network(settings,
                             dat,
                             targets=target_list,
                             sources=source_list)
    try:
        del r['fdr_corrected']
    except:
        pass
    k = list(r.keys())
    assert all(np.array(k) == np.array(target_list)), (
        'Network analysis did not run for all targets.')
    for t in r.keys():
        assert all(r[t]['sources_tested'] == np.array(source_list)), (
            'Network analysis did not run on the correct subset of sources '
            'for target {0}'.format(t))
Exemplo n.º 16
0
def test_network_fdr():
    settings = {'n_perm_max_seq': 1000, 'n_perm_omnibus': 1000}
    target_0 = {
        'selected_vars_sources': [(1, 1), (1, 2), (1, 3), (2, 1), (2, 0)],
        'selected_vars_full': [(0, 1), (0, 2), (0, 3), (1, 1), (1, 2), (1, 3),
                               (2, 1), (2, 0)],
        'omnibus_pval': 0.0001,
        'omnibus_sign': True,
        'selected_sources_pval': np.array([0.001, 0.0014, 0.01, 0.045, 0.047]),
        'selected_sources_te': np.array([1.1, 1.0, 0.8, 0.7, 0.63]),
        }
    target_1 = {
        'selected_vars_sources': [(1, 2), (2, 1), (2, 2)],
        'selected_vars_full': [(1, 0), (1, 1), (1, 2), (2, 1), (2, 2)],
        'omnibus_pval': 0.031,
        'omnibus_sign': True,
        'selected_sources_pval': np.array([0.00001, 0.00014, 0.01]),
        'selected_sources_te': np.array([1.8, 1.75, 0.75]),
        }
    target_2 = {
        'selected_vars_sources': [],
        'selected_vars_full': [(2, 0), (2, 1)],
        'omnibus_pval': 0.41,
        'omnibus_sign': False,
        'selected_sources_pval': None,
        'selected_sources_te': np.array([]),
        }
    res_1 = ResultsNetworkInference(
        n_nodes=3, n_realisations=1000, normalised=True)
    res_1._add_single_result(target=0, settings=settings, results=target_0)
    res_1._add_single_result(target=1, settings=settings, results=target_1)
    res_2 = ResultsNetworkInference(
        n_nodes=3, n_realisations=1000, normalised=True)
    res_2._add_single_result(target=2, settings=settings, results=target_2)

    for correct_by_target in [True, False]:
        settings = {
            'cmi_estimator': 'JidtKraskovCMI',
            'alpha_fdr': 0.05,
            'max_lag_sources': 3,
            'min_lag_sources': 1,
            'max_lag_target': 3,
            'correct_by_target': correct_by_target}
        data = Data()
        data.generate_mute_data(n_samples=100, n_replications=3)
        analysis_setup = MultivariateTE()
        analysis_setup._initialise(settings=settings, data=data,
                                   sources=[1, 2], target=0)
        res_pruned = stats.network_fdr(settings, res_1, res_2)
        assert (not res_pruned._single_target[2].selected_vars_sources), (
            'Target 2 has not been pruned from results.')

        for k in res_pruned.targets_analysed:
            if res_pruned._single_target[k]['selected_sources_pval'] is None:
                assert (
                    not res_pruned._single_target[k]['selected_vars_sources'])
            else:
                assert (
                    len(res_pruned._single_target[k]['selected_vars_sources']) ==
                    len(res_pruned._single_target[k]['selected_sources_pval'])), (
                        'Source list and list of p-values should have '
                        'the same length.')

    # Test function call for single result
    res_pruned = stats.network_fdr(settings, res_1)
    print('successful call on single result dict.')

    # Test None result for insufficient no. permutations, no FDR-corrected
    # results (the results class throws an error if no FDR-corrected results
    # exist).
    res_1.settings['n_perm_max_seq'] = 2
    res_2.settings['n_perm_max_seq'] = 2
    res_pruned = stats.network_fdr(settings, res_1, res_2)
    with pytest.raises(RuntimeError):
        res_pruned.get_adjacency_matrix('binary', fdr=True)
Exemplo n.º 17
0
def import_matarray(file_name, array_name, file_version, dim_order,
                    normalise=True):
    """Read Matlab hdf5 file into IDTxl.

    reads a matlab hdf5 file ("-v7.3' or higher, .mat) with a SINGLE
    array inside and returns a numpy array with dimensions that
    are channel x time x trials, using np.swapaxes where necessary

    Note:
        The import function squeezes the loaded mat-file, i.e., any singleton
        dimension will be removed. Hence do not enter singleton dimension into
        the 'dim_order', e.g., don't pass dim_order='ps' but dim_order='s' if
        you want to load a 1D-array where entries represent samples recorded
        from a single channel.

    Args:
        file_name : string
            full (matlab) file_name on disk
        array_name : string
            variable name of the MATLAB structure to be read
        file_version : string
            version of the file, e.g. 'v7.3' for MATLAB's 7.3 format, currently
            versions 'v4', 'v6', 'v7', and 'v7' are supported
        dim_order : string
            order of dimensions, accepts any combination of the characters
            'p', 's', and 'r' for processes, samples, and replications; must
            have the same length as the data dimensionality, e.g., 'ps' for a
            two-dimensional array of data from several processes over time
        normalise : bool [optional]
            normalise data after import (default=True)

    Returns:
        Data() instance
            instance of IDTxl Data object, containing data from the 'trial'
            field
        list of strings
            list of channel labels, corresponding to the 'label' field
        numpy array
            time stamps for samples, corresponding to one entry in the 'time'
            field
        int
            sampling rate, corresponding to the 'fsample' field

    Created on Wed Mar 19 12:34:36 2014

    @author: Michael Wibral
    """
    if file_version == 'v7.3':
        mat_file = h5py.File(file_name)
        # Assert that at least one of the keys found at the top level of the
        # HDF file  matches the name of the array we wanted
        if array_name not in mat_file.keys():
            raise RuntimeError('Array {0} not in mat file or not a variable '
                               'at the file''s top level.'.format(array_name))

        # 2. Create an object for the matlab array (from the hdf5 hierachy),
        # the trailing [()] ensures everything is read
        mat_data = np.squeeze(np.asarray(mat_file[array_name][()]))

    elif file_version in ['v4', 'v6', 'v7']:
        try:
            m = loadmat(file_name, squeeze_me=True, variable_names=array_name)
        except NotImplementedError as err:
            raise RuntimeError('You may have provided an incorrect file '
                               'version. The mat file was probably saved as '
                               'version 7.3 (hdf5).')
        mat_data = m[array_name]  # loadmat returns a dict containing variables
    else:
        raise ValueError('Unkown file version: {0}.'.format(file_version))

    # Create output: IDTxl data object, list of labels, sampling info in unit
    # time steps (sampling rate of 1).
    print('Creating Data object from matlab array: {0}.'.format(array_name))
    dat = Data(mat_data, dim_order=dim_order, normalise=normalise)
    label = []
    for n in range(dat.n_processes):
        label.append('channel_{0:03d}'.format(n))
    fsample = 1
    timestamps = np.arange(dat.n_samples)
    return dat, label, timestamps, fsample
# Import classes
from idtxl.active_information_storage import ActiveInformationStorage
from idtxl.data import Data

# a) Generate test data
data = Data()
data.generate_mute_data(n_samples=1000, n_replications=5)

# b) Initialise analysis object and define settings
network_analysis = ActiveInformationStorage()
settings = {'cmi_estimator': 'JidtGaussianCMI', 'max_lag': 5}

# c) Run analysis
results = network_analysis.analyse_network(settings=settings, data=data)

# d) Plot list of processes with significant AIS to console
print(results.get_significant_processes(fdr=False))
Exemplo n.º 19
0
def test_bivariate_te_init():
    """Test instance creation for BivariateTE class."""
    # Test error on missing estimator
    settings = {
        'n_perm_max_stat': 21,
        'n_perm_omnibus': 30,
        'n_perm_max_seq': 30,
        'max_lag_sources': 7,
        'min_lag_sources': 2,
        'max_lag_target': 5
    }
    nw = BivariateTE()
    with pytest.raises(RuntimeError):
        nw.analyse_single_target(settings=settings, data=Data(), target=1)

    # Test setting of min and max lags
    settings['cmi_estimator'] = 'JidtKraskovCMI'
    dat = Data()
    dat.generate_mute_data(n_samples=10, n_replications=5)

    # Valid: max lag sources bigger than max lag target
    nw.analyse_single_target(settings=settings, data=dat, target=1)

    # Valid: max lag sources smaller than max lag target
    settings['max_lag_sources'] = 3
    nw.analyse_single_target(settings=settings, data=dat, target=1)

    # Invalid: min lag sources bigger than max lag
    settings['min_lag_sources'] = 8
    settings['max_lag_sources'] = 7
    settings['max_lag_target'] = 5
    with pytest.raises(RuntimeError):
        nw.analyse_single_target(settings=settings, data=dat, target=1)

    # Invalid: taus bigger than lags
    settings['min_lag_sources'] = 2
    settings['max_lag_sources'] = 4
    settings['max_lag_target'] = 5
    settings['tau_sources'] = 10
    with pytest.raises(RuntimeError):
        nw.analyse_single_target(settings=settings, data=dat, target=1)
    settings['tau_sources'] = 1
    settings['tau_target'] = 10
    with pytest.raises(RuntimeError):
        nw.analyse_single_target(settings=settings, data=dat, target=1)

    # Invalid: negative lags or taus
    settings['min_lag_sources'] = 1
    settings['max_lag_target'] = 5
    settings['max_lag_sources'] = -7
    settings['tau_target'] = 1
    with pytest.raises(RuntimeError):
        nw.analyse_single_target(settings=settings, data=dat, target=1)
    settings['max_lag_sources'] = 7
    settings['min_lag_sources'] = -4
    with pytest.raises(RuntimeError):
        nw.analyse_single_target(settings=settings, data=dat, target=1)
    settings['min_lag_sources'] = 4
    settings['max_lag_target'] = -1
    with pytest.raises(RuntimeError):
        nw.analyse_single_target(settings=settings, data=dat, target=1)
    settings['max_lag_target'] = 5
    settings['tau_sources'] = -1
    with pytest.raises(RuntimeError):
        nw.analyse_single_target(settings=settings, data=dat, target=1)
    settings['tau_sources'] = 1
    settings['tau_target'] = -1
    with pytest.raises(RuntimeError):
        nw.analyse_single_target(settings=settings, data=dat, target=1)

    # Invalid: lags or taus are no integers
    settings['tau_target'] = 1
    settings['min_lag_sources'] = 1.5
    with pytest.raises(RuntimeError):
        nw.analyse_single_target(settings=settings, data=dat, target=1)
    settings['min_lag_sources'] = 1
    settings['max_lag_sources'] = 1.5
    with pytest.raises(RuntimeError):
        nw.analyse_single_target(settings=settings, data=dat, target=1)
    settings['max_lag_sources'] = 7
    settings['tau_sources'] = 1.5
    with pytest.raises(RuntimeError):
        nw.analyse_single_target(settings=settings, data=dat, target=1)
    settings['tau_sources'] = 1
    settings['tau_target'] = 1.5
    with pytest.raises(RuntimeError):
        nw.analyse_single_target(settings=settings, data=dat, target=1)
    settings['tau_target'] = 1

    # Invalid: sources or target is no int
    with pytest.raises(RuntimeError):  # no int
        nw.analyse_single_target(settings=settings, data=dat, target=1.5)
    with pytest.raises(RuntimeError):  # negative
        nw.analyse_single_target(settings=settings, data=dat, target=-1)
    with pytest.raises(RuntimeError):  # not in data
        nw.analyse_single_target(settings=settings, data=dat, target=10)
    with pytest.raises(RuntimeError):  # wrong type
        nw.analyse_single_target(settings=settings, data=dat, target={})
    with pytest.raises(RuntimeError):  # negative
        nw.analyse_single_target(settings=settings,
                                 data=dat,
                                 target=0,
                                 sources=-1)
    with pytest.raises(RuntimeError):  # negative
        nw.analyse_single_target(settings=settings,
                                 data=dat,
                                 target=0,
                                 sources=[-1])
    with pytest.raises(RuntimeError):  # not in data
        nw.analyse_single_target(settings=settings,
                                 data=dat,
                                 target=0,
                                 sources=20)
    with pytest.raises(RuntimeError):  # not in data
        nw.analyse_single_target(settings=settings,
                                 data=dat,
                                 target=0,
                                 sources=[20])

    # Force conditionals
    settings['add_conditionals'] = [(0, 1), (1, 3)]
    nw.analyse_single_target(settings=settings, data=dat, target=0)
    settings['add_conditionals'] = (8, 0)
    with pytest.raises(IndexError):
        nw.analyse_single_target(settings=settings, data=dat, target=0)
Exemplo n.º 20
0
def test_get_permuted_replications():
    """Test if permutation of replications works."""
    # Load previously generated example data
    res_0 = np.load(os.path.join(os.path.dirname(__file__),
                    'data/mute_res_0.pkl'))
    res_1 = np.load(os.path.join(os.path.dirname(__file__),
                    'data/mute_res_1.pkl'))

    comp_settings = {
            'cmi_estimator': 'JidtKraskovCMI',
            'n_perm_max_stat': 50,
            'n_perm_min_stat': 50,
            'n_perm_omnibus': 200,
            'n_perm_max_seq': 50,
            'tail': 'two',
            'n_perm_comp': 6,
            'alpha_comp': 0.2,
            'stats_type': 'dependent'
            }
    comp = NetworkComparison()
    comp._initialise(comp_settings)
    comp._create_union(res_0, res_1)

    # Check permutation for dependent samples test: Replace realisations by
    # zeros and ones, check if realisations get swapped correctly.
    dat1 = Data()
    dat1.normalise = False
    dat1.set_data(np.zeros((5, 100, 5)), 'psr')
    dat2 = Data()
    dat2.normalise = False
    dat2.set_data(np.ones((5, 100, 5)), 'psr')
    [cond_a_perm,
     cv_a_perm,
     cond_b_perm,
     cv_b_perm] = comp._get_permuted_replications(data_a=dat1,
                                                  data_b=dat2,
                                                  target=1)
    n_vars = cond_a_perm.shape[1]
    assert (np.sum(cond_a_perm + cond_b_perm, axis=1) == n_vars).all(), (
                'Dependent samples permutation did not work correctly.')
    assert np.logical_xor(cond_a_perm, cond_b_perm).all(), (
                'Dependent samples permutation did not work correctly.')

    # Check permutations for independent samples test: Check the sum over
    # realisations.
    comp_settings['stats_type'] = 'independent'
    comp = NetworkComparison()
    comp._initialise(comp_settings)
    comp._create_union(res_0, res_1)
    [cond_a_perm,
     cv_a_perm,
     cond_b_perm,
     cv_b_perm] = comp._get_permuted_replications(data_a=dat1,
                                                  data_b=dat2,
                                                  target=1)
    n_samples = n_vars * dat1.n_realisations((0, comp.union['max_lag']))
    assert np.sum(cond_a_perm + cond_b_perm, axis=None) == n_samples, (
                'Independent samples permutation did not work correctly.')

    # test unequal number of replications
    dat2.generate_mute_data(100, 7)
    with pytest.raises(AssertionError):
        comp._get_permuted_replications(data_a=dat1, data_b=dat2, target=1)
Exemplo n.º 21
0
def test_network_comparison_use_cases():
    """Run all intended use cases, within/between, dependent/independent."""
    data = Data()
    data.generate_mute_data(100, 5)

    path = os.path.join(os.path.dirname(__file__), 'data/')
    res_0 = pickle.load(open(path + 'mute_results_0.p', 'rb'))
    res_1 = pickle.load(open(path + 'mute_results_1.p', 'rb'))
    res_2 = pickle.load(open(path + 'mute_results_2.p', 'rb'))
    res_3 = pickle.load(open(path + 'mute_results_3.p', 'rb'))
    res_4 = pickle.load(open(path + 'mute_results_4.p', 'rb'))

    # comparison settings
    comp_settings = {
        'cmi_estimator': 'JidtKraskovCMI',
        'n_perm_max_stat': 50,
        'n_perm_min_stat': 50,
        'n_perm_omnibus': 200,
        'n_perm_max_seq': 50,
        'alpha_comp': 0.26,
        'n_perm_comp': 4,
        'tail': 'two'
    }

    comp = NetworkComparison()

    print('\n\nTEST 0 - independent within')
    comp_settings['stats_type'] = 'independent'
    comp.compare_within(comp_settings, res_0, res_1, data, data)

    print('\n\nTEST 1 - dependent within')
    comp_settings['stats_type'] = 'dependent'
    comp.compare_within(comp_settings, res_0, res_1, data, data)

    print('\n\nTEST 2 - independent between')
    comp_settings['stats_type'] = 'independent'
    comp.compare_between(comp_settings,
                         network_set_a=np.array((res_0, res_1)),
                         network_set_b=np.array((res_2, res_3)),
                         data_set_a=np.array((data, data)),
                         data_set_b=np.array((data, data)))

    print('\n\nTEST 3 - dependent between')
    comp_settings['stats_type'] = 'dependent'
    comp.compare_between(comp_settings,
                         network_set_a=np.array((res_0, res_1)),
                         network_set_b=np.array((res_2, res_3)),
                         data_set_a=np.array((data, data)),
                         data_set_b=np.array((data, data)))

    print('\n\nTEST 4 - independent within unbalanced')
    comp_settings['stats_type'] = 'independent'
    comp = NetworkComparison()
    comp.compare_within(comp_settings, res_0, res_1, data, data)

    print('\n\nTEST 5 - independent between unbalanced')
    comp_settings['stats_type'] = 'independent'
    comp = NetworkComparison()
    comp.compare_between(comp_settings,
                         network_set_a=np.array((res_0, res_1)),
                         network_set_b=np.array((res_2, res_3, res_4)),
                         data_set_a=np.array((data, data)),
                         data_set_b=np.array((data, data, data)))
Exemplo n.º 22
0
def test_network_comparison_use_cases():
    """Run all intended use cases, within/between, dependent/independent."""
    dat = Data()
    dat.generate_mute_data(100, 5)

    # Load previously generated example data (pickled)
    res_0 = np.load(os.path.join(os.path.dirname(__file__),
                    'data/mute_res_0.pkl'))
    res_1 = np.load(os.path.join(os.path.dirname(__file__),
                    'data/mute_res_1.pkl'))
    res_2 = np.load(os.path.join(os.path.dirname(__file__),
                    'data/mute_res_2.pkl'))
    res_3 = np.load(os.path.join(os.path.dirname(__file__),
                    'data/mute_res_3.pkl'))
    res_4 = np.load(os.path.join(os.path.dirname(__file__),
                    'data/mute_res_4.pkl'))

#    path = os.path.dirname(__file__) + 'data/'
#    res_0 = idtxl_io.load_pickle(path + 'mute_res_0')
#    res_1 = idtxl_io.load_pickle(path + 'mute_res_1')
#    res_2 = idtxl_io.load_pickle(path + 'mute_res_2')
#    res_3 = idtxl_io.load_pickle(path + 'mute_res_3')
#    res_4 = idtxl_io.load_pickle(path + 'mute_res_4')

    # comparison settings
    comp_settings = {
            'cmi_estimator': 'JidtKraskovCMI',
            'n_perm_max_stat': 50,
            'n_perm_min_stat': 50,
            'n_perm_omnibus': 200,
            'n_perm_max_seq': 50,
            'alpha_comp': 0.26,
            'n_perm_comp': 4,
            'tail': 'two'
            }

    comp = NetworkComparison()

    print('\n\nTEST 0 - independent within')
    comp_settings['stats_type'] = 'independent'
    comp.compare_within(comp_settings, res_0, res_1, dat, dat)

    print('\n\nTEST 1 - dependent within')
    comp_settings['stats_type'] = 'dependent'
    comp.compare_within(comp_settings, res_0, res_1, dat, dat)

    print('\n\nTEST 2 - independent between')
    comp_settings['stats_type'] = 'independent'
    comp.compare_between(comp_settings,
                         network_set_a=np.array((res_0, res_1)),
                         network_set_b=np.array((res_2, res_3)),
                         data_set_a=np.array((dat, dat)),
                         data_set_b=np.array((dat, dat)))

    print('\n\nTEST 3 - dependent between')
    comp_settings['stats_type'] = 'dependent'
    comp.compare_between(comp_settings,
                         network_set_a=np.array((res_0, res_1)),
                         network_set_b=np.array((res_2, res_3)),
                         data_set_a=np.array((dat, dat)),
                         data_set_b=np.array((dat, dat)))

    print('\n\nTEST 4 - independent within unbalanced')
    comp_settings['stats_type'] = 'independent'
    comp = NetworkComparison()
    comp.compare_within(comp_settings, res_0, res_1, dat, dat)

    print('\n\nTEST 5 - independent between unbalanced')
    comp_settings['stats_type'] = 'independent'
    comp = NetworkComparison()
    comp.compare_between(comp_settings,
                         network_set_a=np.array((res_0, res_1)),
                         network_set_b=np.array((res_2, res_3, res_4)),
                         data_set_a=np.array((dat, dat)),
                         data_set_b=np.array((dat, dat, dat)))
Exemplo n.º 23
0
def test_multivariate_te_corr_gaussian(estimator=None):
    """Test multivariate TE estimation on correlated Gaussians.

    Run the multivariate TE algorithm on two sets of random Gaussian data with
    a given covariance. The second data set is shifted by one sample creating
    a source-target delay of one sample. This example is modeled after the
    JIDT demo 4 for transfer entropy. The resulting TE can be compared to the
    analytical result (but expect some error in the estimate).

    The simulated delay is 1 sample, i.e., the algorithm should find
    significant TE from sample (0, 1), a sample in process 0 with lag/delay 1.
    The final target sample should always be (1, 1), the mandatory sample at
    lat 1, because there is no memory in the process.

    Note:
        This test runs considerably faster than other system tests.
        This produces strange small values for non-coupled sources.  TODO
    """
    if estimator is None:
        estimator = 'jidt_kraskov'

    n = 1000
    cov = 0.4
    source_1 = [rn.normalvariate(0, 1) for r in range(n)]  # correlated src
    # source_2 = [rn.normalvariate(0, 1) for r in range(n)]  # uncorrelated src
    target = [
        sum(pair)
        for pair in zip([cov * y for y in source_1],
                        [(1 - cov) * y
                         for y in [rn.normalvariate(0, 1) for r in range(n)]])
    ]
    # Cast everything to numpy so the idtxl estimator understands it.
    source_1 = np.expand_dims(np.array(source_1), axis=1)
    # source_2 = np.expand_dims(np.array(source_2), axis=1)
    target = np.expand_dims(np.array(target), axis=1)

    dat = Data(normalise=True)
    dat.set_data(np.vstack((source_1[1:].T, target[:-1].T)), 'ps')
    analysis_opts = {
        'cmi_calc_name': estimator,
        'n_perm_max_stat': 21,
        'n_perm_min_stat': 21,
        'n_perm_omnibus': 21,
        'n_perm_max_seq': 21,
    }
    random_analysis = Multivariate_te(max_lag_sources=5,
                                      min_lag_sources=1,
                                      max_lag_target=5,
                                      options=analysis_opts)
    # res = random_analysis.analyse_network(dat)  # full network
    # utils.print_dict(res)
    res_1 = random_analysis.analyse_single_target(dat, 1)  # coupled direction
    # Assert that there are significant conditionals from the source for target
    # 1. For 500 repetitions I got mean errors of 0.02097686 and 0.01454073 for
    # examples 1 and 2 respectively. The maximum errors were 0.093841 and
    # 0.05833172 repectively. This inspired the following error boundaries.
    expected_res = np.log(1 / (1 - np.power(cov, 2)))
    diff = np.abs(max(res_1['cond_sources_te']) - expected_res)
    print('Expected source sample: (0, 1)\nExpected target sample: (1, 1)')
    print(('Estimated TE: {0:5.4f}, analytical result: {1:5.4f}, error:'
           '{2:2.2f} % ').format(max(res_1['cond_sources_te']), expected_res,
                                 diff / expected_res))
    assert (diff < 0.1), ('Multivariate TE calculation for correlated '
                          'Gaussians failed (error larger 0.1: {0}, expected: '
                          '{1}, actual: {2}).'.format(
                              diff, expected_res, res_1['cond_sources_te']))
# Import classes
import numpy as np
from idtxl.partial_information_decomposition import (
    PartialInformationDecomposition)
from idtxl.data import Data

# a) Generate test data
n = 100
alph = 2
x = np.random.randint(0, alph, n)
y = np.random.randint(0, alph, n)
z = np.logical_xor(x, y).astype(int)
data = Data(np.vstack((x, y, z)), 'ps', normalise=False)

# b) Initialise analysis object and define settings for both PID estimators
pid = PartialInformationDecomposition()
settings_tartu = {'pid_estimator': 'TartuPID', 'lags_pid': [0, 0]}
settings_sydney = {
    'alph_s1': alph,
    'alph_s2': alph,
    'alph_t': alph,
    'max_unsuc_swaps_row_parm': 60,
    'num_reps': 63,
    'max_iters': 1000,
    'pid_estimator': 'SydneyPID',
    'lags_pid': [0, 0]
}

# c) Run Tartu estimator
results_tartu = pid.analyse_single_target(settings=settings_tartu,
                                          data=data,
def test_multivariate_mi_init():
    """Test instance creation for MultivariateMI class."""
    # Test error on missing estimator
    settings = {
        'n_perm_max_stat': 21,
        'n_perm_omnibus': 30,
        'max_lag_sources': 7,
        'min_lag_sources': 2
    }
    nw = MultivariateMI()
    with pytest.raises(RuntimeError):
        nw.analyse_single_target(settings=settings, data=Data(), target=1)

    # Test setting of min and max lags
    settings['cmi_estimator'] = 'JidtKraskovCMI'
    data = Data()
    data.generate_mute_data(n_samples=10, n_replications=5)

    # Invalid: min lag sources bigger than max lag
    settings['min_lag_sources'] = 8
    settings['max_lag_sources'] = 7
    with pytest.raises(RuntimeError):
        nw.analyse_single_target(settings=settings, data=data, target=1)

    # Invalid: tau bigger than lags
    settings['min_lag_sources'] = 2
    settings['max_lag_sources'] = 4
    settings['tau_sources'] = 10
    with pytest.raises(RuntimeError):
        nw.analyse_single_target(settings=settings, data=data, target=1)

    # Invalid: negative lags or taus
    settings['tau_sources'] = 1
    settings['min_lag_sources'] = 1
    settings['max_lag_sources'] = -7
    with pytest.raises(RuntimeError):
        nw.analyse_single_target(settings=settings, data=data, target=1)
    settings['max_lag_sources'] = 7
    settings['min_lag_sources'] = -4
    with pytest.raises(RuntimeError):
        nw.analyse_single_target(settings=settings, data=data, target=1)
    settings['min_lag_sources'] = 4
    settings['tau_sources'] = -1
    with pytest.raises(RuntimeError):
        nw.analyse_single_target(settings=settings, data=data, target=1)

    # Invalid: lags or taus are no integers
    settings['min_lag_sources'] = 1.5
    with pytest.raises(RuntimeError):
        nw.analyse_single_target(settings=settings, data=data, target=1)
    settings['min_lag_sources'] = 1
    settings['max_lag_sources'] = 1.5
    with pytest.raises(RuntimeError):
        nw.analyse_single_target(settings=settings, data=data, target=1)
    settings['max_lag_sources'] = 7
    settings['tau_sources'] = 1.5
    with pytest.raises(RuntimeError):
        nw.analyse_single_target(settings=settings, data=data, target=1)

    # Invalid: sources or target is no int
    with pytest.raises(RuntimeError):  # no int
        nw.analyse_single_target(settings=settings, data=data, target=1.5)
    with pytest.raises(RuntimeError):  # negative
        nw.analyse_single_target(settings=settings, data=data, target=-1)
    with pytest.raises(RuntimeError):  # not in data
        nw.analyse_single_target(settings=settings, data=data, target=10)
    with pytest.raises(RuntimeError):  # wrong type
        nw.analyse_single_target(settings=settings, data=data, target={})
    with pytest.raises(RuntimeError):  # negative
        nw.analyse_single_target(settings=settings,
                                 data=data,
                                 target=0,
                                 sources=-1)
    with pytest.raises(RuntimeError):  # negative
        nw.analyse_single_target(settings=settings,
                                 data=data,
                                 target=0,
                                 sources=[-1])
    with pytest.raises(RuntimeError):  # not in data
        nw.analyse_single_target(settings=settings,
                                 data=data,
                                 target=0,
                                 sources=20)
    with pytest.raises(RuntimeError):  # not in data
        nw.analyse_single_target(settings=settings,
                                 data=data,
                                 target=0,
                                 sources=[20])
Exemplo n.º 26
0
def test_ais_fdr():
    settings = {'n_perm_max_seq': 1000, 'n_perm_mi': 1000}
    process_0 = {
        'selected_vars': [(0, 1), (0, 2), (0, 3)],
        'ais_pval': 0.0001,
        'ais_sign': True
    }
    process_1 = {
        'selected_vars': [(1, 0), (1, 1), (1, 2)],
        'ais_pval': 0.031,
        'ais_sign': True
    }
    process_2 = {'selected_vars': [], 'ais_pval': 0.41, 'ais_sign': False}
    res_1 = ResultsSingleProcessAnalysis(n_nodes=3,
                                         n_realisations=1000,
                                         normalised=True)
    res_1._add_single_result(process=0, settings=settings, results=process_0)
    res_1._add_single_result(process=1, settings=settings, results=process_1)
    res_2 = ResultsSingleProcessAnalysis(n_nodes=3,
                                         n_realisations=1000,
                                         normalised=True)
    res_2._add_single_result(process=2, settings=settings, results=process_2)

    settings = {
        'cmi_estimator': 'JidtKraskovCMI',
        'alpha_fdr': 0.05,
        'max_lag': 3
    }
    data = Data()
    data.generate_mute_data(n_samples=100, n_replications=3)
    analysis_setup = ActiveInformationStorage()
    analysis_setup._initialise(settings=settings, data=data, process=1)
    res_pruned = stats.ais_fdr(settings, res_1, res_2)
    assert (not res_pruned._single_process[2].selected_vars_sources), (
        'Process 2 has not been pruned from results.')

    alpha_fdr = res_pruned.settings.alpha_fdr
    for k in res_pruned.processes_analysed:
        if not res_pruned._single_process[k]['ais_sign']:
            assert (res_pruned._single_process[k]['ais_pval'] >
                    alpha_fdr), ('P-value of non-sign. AIS is not 1.')
            assert (not res_pruned._single_process[k]['selected_vars']), (
                'List of significant past variables is not empty')
        else:
            assert (res_pruned._single_process[k]['ais_pval'] <
                    1), ('P-value of sign. AIS is not smaller 1.')
            assert (res_pruned._single_process[k]['selected_vars']), (
                'List of significant past variables is empty')

    # Test function call for single result
    res_pruned = stats.ais_fdr(settings, res_1)
    print('successful call on single result dict.')

    # Test None result for insufficient no. permutations, no FDR-corrected
    # results (the results class throws an error if no FDR-corrected results
    # exist).
    res_1.settings['n_perm_mi'] = 2
    res_2.settings['n_perm_mi'] = 2
    res_pruned = stats.ais_fdr(settings, res_1, res_2)
    with pytest.raises(RuntimeError):
        res_pruned.get_significant_processes(fdr=True)
Exemplo n.º 27
0
import os
import time
import numpy as np
from idtxl.multivariate_te import Multivariate_te
from idtxl.data import Data

start_time = time.time()
# load simulated data from 2 coupled Lorenz systems 1->2, u = 45 ms
d = np.load(os.path.join(os.path.dirname(__file__),
            'data/lorenz_2_exampledata.npy'))
dat = Data()
dat.set_data(d[:, :, 0:100], 'psr')
analysis_opts = {
        'cmi_calc_name': 'jidt_kraskov',
        'n_perm_max_stat': 200,
        'n_perm_min_stat': 200,
        'n_perm_omnibus': 500,
        'n_perm_max_seq': 500,
        }
lorenz_analysis = Multivariate_te(max_lag_sources=50, min_lag_sources=40,
                                  max_lag_target=30, tau_sources=1,
                                  tau_target=3, options=analysis_opts)
res_1 = lorenz_analysis.analyse_single_target(dat, 0)
res_2 = lorenz_analysis.analyse_single_target(dat, 1)
runtime = time.time() - start_time
print("---- {0} minutes".format(runtime / 60))

path = os.path.dirname(__file__) + 'output/'
np.savez(path + 'test_lorenz', res_1, res_2)
np.save(path + 'test_lorenz_time', runtime)
Exemplo n.º 28
0
def test_return_local_values():
    """Test estimation of local values."""
    max_lag = 5
    data = Data(seed=SEED)
    data.generate_mute_data(200, 5)
    settings = {
        'cmi_estimator': 'JidtKraskovCMI',
        'local_values': True,  # request calculation of local values
        'n_perm_max_stat': 21,
        'n_perm_min_stat': 21,
        'n_perm_max_seq': 21,
        'n_perm_omnibus': 21,
        'max_lag_sources': max_lag,
        'min_lag_sources': max_lag,
        'max_lag_target': max_lag
    }
    target = 1
    mi = BivariateMI()
    results_local = mi.analyse_network(settings, data, targets=[target])

    lmi = results_local.get_single_target(target, fdr=False)['mi']
    if lmi is None:
        return
    n_sources = len(results_local.get_target_sources(target, fdr=False))
    assert type(lmi) is np.ndarray, (
        'LMI estimation did not return an array of values: {0}'.format(lmi))
    assert lmi.shape[0] == n_sources, (
        'Wrong dim (no. sources) in LMI estimate: {0}'.format(lmi.shape))
    assert lmi.shape[1] == data.n_realisations_samples(
        (0, max_lag)), ('Wrong dim (no. samples) in LMI estimate {0}'.format(
            lmi.shape))
    assert lmi.shape[2] == data.n_replications, (
        'Wrong dim (no. replications) in LMI estimate {0}'.format(lmi.shape))

    # Test for correctnes of single link MI estimation by comparing it to the
    # MI between single variables and the target. For this test case where we
    # find only one significant past variable per source, the two should be the
    # same. Also compare single link average MI to mean local MI for each
    # link.
    settings['local_values'] = False
    results_avg = mi.analyse_network(settings, data, targets=[target])
    mi_single_link = results_avg.get_single_target(target, fdr=False)['mi']
    mi_selected_sources = results_avg.get_single_target(
        target, fdr=False)['selected_sources_mi']
    sources_local = results_local.get_target_sources(target, fdr=False)
    sources_avg = results_avg.get_target_sources(target, fdr=False)
    print('Single link average MI: {0}, single source MI: {1}.'.format(
        mi_single_link, mi_selected_sources))
    if mi_single_link is None:
        return
    assert np.isclose(mi_single_link, mi_selected_sources, atol=0.005).all(), (
        'Single link average MI {0} and single source MI {1} deviate.'.format(
            mi_single_link, mi_selected_sources))
    # Check if average and local values are the same. Test each source
    # separately. Inferred sources may differ between the two calls to
    # analyse_network() due to low number of surrogates used in unit testing.
    print('Compare average and local values.')
    for s in list(set(sources_avg).intersection(sources_local)):
        i1 = np.where(sources_avg == s)[0][0]
        i2 = np.where(sources_local == s)[0][0]
        assert np.isclose(
            mi_single_link[i1], np.mean(lmi[i2, :, :]), atol=0.005
        ), ('Single link average MI {0:0.6f} and mean LMI {1:0.6f} deviate.'.
            format(mi_single_link[i1], np.mean(lmi[i2, :, :])))
        assert np.isclose(
            mi_single_link[i1], mi_selected_sources[i1], atol=0.005
        ), ('Single link average MI {0:0.6f} and single source MI {1:0.6f} deviate.'
            .format(mi_single_link[i1], mi_selected_sources[i1]))
Exemplo n.º 29
0
import os
import time
import numpy as np
from idtxl.multivariate_te import MultivariateTE
from idtxl.data import Data

start_time = time.time()
dat = Data()  # initialise an empty data object
dat.generate_mute_data(n_samples=1000, n_replications=10)
settings = {
    'cmi_estimator': 'JidtKraskovCMI',
    'n_perm_max_stat': 500,
    'n_perm_min_stat': 200,
    'n_perm_omnibus': 500,
    'n_perm_max_seq': 500,
    'max_lag_sources': 5,
    'min_lag_sources': 1
}

network_analysis = MultivariateTE()
res = network_analysis.analyse_network(settings, dat)
runtime = time.time() - start_time
print("---- {0} minutes".format(runtime / 60))

path = os.path.dirname(__file__) + 'output/'
np.save(path + 'test', res)
np.save(path + 'test_time', runtime)
Exemplo n.º 30
0
"""Unit tests for IDTxl I/O functions."""
import os
import pickle
import pytest
import numpy as np
from pkg_resources import resource_filename
from idtxl import idtxl_io as io
from idtxl.data import Data
from idtxl.network_comparison import NetworkComparison

# Generate data and load network inference results.
n_nodes = 5
data_0 = Data()
data_0.generate_mute_data(500, 5)
data_1 = Data(np.random.rand(n_nodes, 500, 5), 'psr')

path = os.path.join(os.path.dirname(__file__), 'data/')
res_0 = pickle.load(open(path + 'mute_results_0.p', 'rb'))
res_1 = pickle.load(open(path + 'mute_results_1.p', 'rb'))

# Generate network comparison results.
comp_settings = {
    'cmi_estimator': 'JidtKraskovCMI',
    'stats_type': 'independent',
    'n_perm_max_stat': 50,
    'n_perm_min_stat': 50,
    'n_perm_omnibus': 200,
    'n_perm_max_seq': 50,
    'alpha_comp': 0.26,
    'n_perm_comp': 200,
    'tail': 'two',