def gen_data_frame(links_coeffs, time, seed_val):
    # Set the random seed
    np.random.seed(seed_val)
    # Generate the data
    data, _ = pp.var_process(links_coeffs, T=time)
    # Get the true parents
    true_parents = _get_parent_graph(links_coeffs)
    return pp.DataFrame(data), true_parents
示例#2
0
def gen_data_frame(links_coeffs, time, seed_val):
    # Set the random seed
    np.random.seed(seed_val)
    # Generate the data
    data, _ = pp.var_process(links_coeffs, T=time)
    # Get the true parents
    true_parents = _get_parent_graph(links_coeffs)
    return pp.DataFrame(data), true_parents
示例#3
0
def a_sample(request):
    # Set the parameters
    links_coeffs, time, seed_val = request.param
    # Set the random seed
    np.random.seed(seed_val)
    # Generate the data
    data, _ = pp.var_process(links_coeffs, T=time)
    # Get the true parents
    true_parents = _get_parent_graph(links_coeffs)
    return pp.DataFrame(data), true_parents
def a_sample(request):
    # Set the parameters
    links_coeffs, time, seed_val = request.param
    # Set the random seed
    np.random.seed(seed_val)
    # Generate the data
    data, _ = pp.var_process(links_coeffs, T=time)
    # Get the true parents
    true_parents = _get_parent_graph(links_coeffs)
    return pp.DataFrame(data), true_parents
示例#5
0
def gen_process(a_process):
    """
    Calls var_process for the process fixtures
    """
    # Get the initial values and setup for the decay process
    _, init_vals, coefs, expect = a_process
    # Deducte the max time from the expected answer shape
    max_time = expect.shape[0]
    # Generate the data
    data, true_parents_neighbors = pp.var_process(coefs,
                                                  T=max_time,
                                                  initial_values=init_vals,
                                                  use="no_noise")
    return data, true_parents_neighbors
示例#6
0
def gen_process(a_process):
    """
    Calls var_process for the process fixtures
    """
    # Get the initial values and setup for the decay process
    _, init_vals, coefs, expect = a_process
    # Deducte the max time from the expected answer shape
    max_time = expect.shape[0]
    # Generate the data
    data, true_parents_neighbors = pp.var_process(coefs,
                                                  T=max_time,
                                                  initial_values=init_vals,
                                                  use="no_noise")
    return data, true_parents_neighbors
示例#7
0
def test_noise_generation(covariance_parameters):
    """
    Ensure the covariance parameters are respected when the noise is generated
    """
    # Unpack the parameters and covariance matrix
    good_params, covar_matrix = covariance_parameters
    # Generate noise-only from this parameter set
    data, _ = pp.var_process(good_params, T=10000, use='inno_cov',
                             verbosity=0, initial_values=None)
    # Get the covariance of the data set
    covar_result = np.cov(data.T)
    err_message = "Covariance of data does not match covariance implied by "+\
                  " parameter set"
    np.testing.assert_allclose(covar_matrix, covar_result,
                               rtol=1e-1, atol=0.025,
                               verbose=True, err_msg=err_message)
    def setUp(self):

        auto = .5
        coeff = 0.6
        T = 1000
        numpy.random.seed(42)
        # True graph
        links_coeffs = {
            0: [((0, -1), auto)],
            1: [((1, -1), auto), ((0, -1), coeff)],
            2: [((2, -1), auto), ((1, -1), coeff)]
        }

        self.data, self.true_parents_coeffs = pp.var_process(links_coeffs, T=T)
        T, N = self.data.shape

        self.true_parents = _get_parent_graph(self.true_parents_coeffs)
    def setUp(self):

        auto = 0.6
        coeff = 0.6
        T = 1000
        numpy.random.seed(42)
        # True graph
        links_coeffs = {
            0: [((0, -1), auto)],
            1: [((1, -1), auto), ((0, -1), coeff)],
            2: [((2, -1), auto), ((1, -1), coeff)]
        }

        self.data, self.true_parents_coeffs = pp.var_process(links_coeffs, T=T)
        T, N = self.data.shape

        self.ci_par_corr = ParCorr(use_mask=False,
                                   mask_type=None,
                                   significance='analytic',
                                   fixed_thres=None,
                                   sig_samples=10000,
                                   sig_blocklength=3,
                                   confidence='analytic',
                                   conf_lev=0.9,
                                   conf_samples=10000,
                                   conf_blocklength=1,
                                   recycle_residuals=False,
                                   verbosity=0)

        self.ci_gpdc = GPDC(significance='analytic',
                            sig_samples=1000,
                            sig_blocklength=1,
                            confidence='bootstrap',
                            conf_lev=0.9,
                            conf_samples=100,
                            conf_blocklength=None,
                            use_mask=False,
                            mask_type='y',
                            recycle_residuals=False,
                            verbosity=0)
示例#10
0
def test_noise_generation(covariance_parameters):
    """
    Ensure the covariance parameters are respected when the noise is generated
    """
    # Unpack the parameters and covariance matrix
    good_params, covar_matrix = covariance_parameters
    # Generate noise-only from this parameter set
    data, _ = pp.var_process(good_params,
                             T=10000,
                             use='inno_cov',
                             verbosity=0,
                             initial_values=None)
    # Get the covariance of the data set
    covar_result = np.cov(data.T)
    err_message = "Covariance of data does not match covariance implied by "+\
                  " parameter set"
    np.testing.assert_allclose(covar_matrix,
                               covar_result,
                               rtol=1e-1,
                               atol=0.025,
                               verbose=True,
                               err_msg=err_message)
示例#11
0
                max_conds_px=max_conds_px,
            )

    return j, results_in_j


# Example data, here the real dataset can be loaded as a numpy array of shape
# (T, N)
numpy.random.seed(42)     # Fix random seed
links_coeffs = {0: [((0, -1), 0.7)],
                1: [((1, -1), 0.8), ((0, -1), 0.8)],
                2: [((2, -1), 0.5), ((1, -2), 0.5)],
                }

T = 500     # time series length
data, true_parents_neighbors = pp.var_process(links_coeffs, T=T)
T, N = data.shape

# Initialize dataframe object
dataframe = pp.DataFrame(data)

# Optionally specify variable names
var_names = [r'$X^0$', r'$X^1$', r'$X^2$', r'$X^3$']

# Significance level in condition-selection step. If a list of levels is is
# provided or pc_alpha=None, the optimal pc_alpha is automatically chosen via
# model-selection.
pc_alpha = 0.2  # [0.01, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5]
selected_variables = range(N)  #[2] # [2]  # [2]

# Maximum time lag
示例#12
0
 def run(self):
     data, _ = pp.var_process(self.links_coeffs, T=1000)
     dataframe = pp.DataFrame(data)
     cond_ind_test = ParCorr()
     self.pcmciobj = PCMCI(dataframe=dataframe, cond_ind_test=cond_ind_test)
     self.results = self.pcmciobj.run_pcmci(tau_max=2, pc_alpha=None)
    def plot__gpdc_get_single_residuals(self):

        #######
        ci_test = self.ci_gpdc
        # ci_test = self.ci_par_corr

        a = 0.
        c = .3
        T = 500
        # Each key refers to a variable and the incoming links are supplied as a
        # list of format [((driver, lag), coeff), ...]
        links_coeffs = {
            0: [((0, -1), a)],
            1: [((1, -1), a), ((0, -1), c)],
        }

        numpy.random.seed(42)
        data, true_parents_neighbors = pp.var_process(links_coeffs,
                                                      use='inv_inno_cov',
                                                      T=T)
        dataframe = pp.DataFrame(data)
        ci_test.set_dataframe(dataframe)

        # ci_test.set_tau_max(1)

        # X=[(1, -1)]
        # Y=[(1, 0)]
        # Z=[(0, -1)] + [(1, -tau) for tau in range(1, 2)]
        # array, xyz, XYZ = ci_test.get_array(X, Y, Z,
        #     verbosity=0)]
        # ci_test.run_test(X, Y, Z,)
        def func(x):
            return x * (1. - 4. * x**0 * numpy.exp(-x**2 / 2.))

        true_residual = numpy.random.randn(3, T)
        array = numpy.copy(true_residual)
        array[1] += c * func(array[2])  #.sum(axis=0)
        xyz = numpy.array([0, 1] + [2 for i in range(array.shape[0] - 2)])

        print 'xyz ', xyz, numpy.where(xyz == 1)
        target_var = 1

        dim, T = array.shape
        # array -= array.mean(axis=1).reshape(dim, 1)
        c_std = c  #/array[1].std()
        # array /= array.std(axis=1).reshape(dim, 1)
        array_orig = numpy.copy(array)

        import matplotlib
        from matplotlib import pyplot
        (est_residual, pred) = ci_test._get_single_residuals(array,
                                                             target_var,
                                                             standardize=False,
                                                             return_means=True)
        (resid_, pred_parcorr) = self.ci_par_corr._get_single_residuals(
            array, target_var, standardize=False, return_means=True)

        fig = pyplot.figure()
        ax = fig.add_subplot(111)
        ax.scatter(array_orig[2], array_orig[1])
        ax.scatter(array_orig[2], pred, color='red')
        ax.scatter(array_orig[2], pred_parcorr, color='green')
        ax.plot(numpy.sort(array_orig[2]),
                c_std * func(numpy.sort(array_orig[2])),
                color='black')

        pyplot.savefig('/home/jakobrunge/test/gpdctest.pdf')
示例#14
0
                max_conds_px=max_conds_px,
            )

    return j, results_in_j


# Example data, here the real dataset can be loaded as a numpy array of shape
# (T, N)
numpy.random.seed(42)     # Fix random seed
links_coeffs = {0: [((0, -1), 0.7)],
                1: [((1, -1), 0.8), ((0, -1), 0.8)],
                2: [((2, -1), 0.5), ((1, -2), 0.5)],
                }

T = 500     # time series length
data, true_parents_neighbors = pp.var_process(links_coeffs, T=T)
T, N = data.shape

# Optionally specify variable names
var_names = [r'$X^0$', r'$X^1$', r'$X^2$', r'$X^3$']

# Initialize dataframe object
dataframe = pp.DataFrame(data, var_names=var_names)

# Significance level in condition-selection step. If a list of levels is is
# provided or pc_alpha=None, the optimal pc_alpha is automatically chosen via
# model-selection.
pc_alpha = 0.2  # [0.01, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5]
selected_variables = range(N)  #[2] # [2]  # [2]

# Maximum time lag