Exemplo n.º 1
0
    def test_fit_sample(self):
        data = sample_trivariate_xyz()
        model = GaussianMultivariate()
        model.fit(data)

        for N in [10, 50, 100]:
            assert len(model.sample(N)) == N

        sampled_data = model.sample(10)

        assert sampled_data.shape == (10, 3)
        for column in data.columns:
            assert column in sampled_data
Exemplo n.º 2
0
    def test_cdf(self):
        data = sample_trivariate_xyz()
        model = GaussianMultivariate()
        model.fit(data)

        sampled_data = model.sample(10)

        # Test CDF
        cdf = model.cumulative_distribution(sampled_data)
        assert (0 <= cdf).all() and (cdf <= 1).all()

        # Test CDF increasing function
        for column in sampled_data.columns:
            sorted_data = sampled_data.sort_values(column)
            other_columns = data.columns.to_list()
            other_columns.remove(column)
            row = sorted_data.sample(1).iloc[0]
            for column in other_columns:
                sorted_data[column] = row[column]

            cdf = model.cumulative_distribution(sorted_data)
            diffs = np.diff(
                cdf
            ) + 0.001  # Add tolerance to avoid floating precision issues.
            assert (diffs >= 0).all()
Exemplo n.º 3
0
    def test_fit_sample_distribution_dict(self):
        data = sample_trivariate_xyz()
        model = GaussianMultivariate(distribution={'x': GaussianKDE()})
        model.fit(data)

        sampled_data = model.sample(10)
        assert sampled_data.shape == (10, 3)
Exemplo n.º 4
0
def fit_copula_to_z2_data(name=None, do_plot=False):
    """ Example of fitting a copula to z2 stream data

    :param   name:  stream name
    :return: copula  obj
    """
    name = name or 'z2~helicopter_psi~helicopter_theta~70.json'
    assert 'z2~' in name, "Expecting a bivariate stream"

    lagged_values = get_stream_lagged_values(name=name)
    normalized_points = [
        mr.norminv(mr.from_zcurve(zvalue=z, dim=2)) for z in lagged_values
    ]
    npitch, nyaw = zip(*normalized_points)

    copula = GaussianMultivariate()
    X = np.array([npitch, nyaw]).transpose()
    copula.fit(X)

    synthetic_points = copula.sample(len(X))
    spitch = synthetic_points[0]
    syaw = synthetic_points[1]
    if do_plot:
        plt.scatter(spitch, syaw)
        plt.xlabel('Simulated Pitch - normalized')
        plt.ylabel('Simulated Yaw - normalized')
        plt.show()
    return copula
Exemplo n.º 5
0
    def test_fit_sample_distribution_name(self):
        data = sample_trivariate_xyz()
        model = GaussianMultivariate(
            'copulas.univariate.gaussian_kde.GaussianKDE')
        model.fit(data)

        sampled_data = model.sample(10)
        assert sampled_data.shape == (10, 3)
Exemplo n.º 6
0
 def test_gaussiankde_arguments(self):
     size = 1000
     low = 0
     high = 9
     data = randint.rvs(low, high, size=size) + norm.rvs(0, 0.1, size=size)
     dist = GaussianMultivariate(distribution=GaussianKDE(bw_method=0.01))
     dist.fit(data)
     samples = dist.sample(size).to_numpy()[0]
     d, p = ks_2samp(data, samples)
     assert p >= 0.05
Exemplo n.º 7
0
    def test_pdf(self):
        data = sample_trivariate_xyz()
        model = GaussianMultivariate()
        model.fit(data)

        sampled_data = model.sample(10)

        # Test PDF
        pdf = model.probability_density(sampled_data)
        assert (0 < pdf).all()
Exemplo n.º 8
0
    def test_fit_sample_distribution_dict_multiple(self):
        data = sample_trivariate_xyz()
        model = GaussianMultivariate(
            distribution={
                'x': Univariate(parametric=ParametricType.PARAMETRIC),
                'y': BetaUnivariate(),
                'z': GaussianKDE()
            })
        model.fit(data)

        sampled_data = model.sample(10)
        assert sampled_data.shape == (10, 3)
Exemplo n.º 9
0
def test_conditional_sampling():
    condition = np.random.randint(1, 4, size=3000)
    conditioned = np.random.normal(loc=1, scale=1, size=3000) * condition
    data = pd.DataFrame({
        'a': condition,
        'b': condition,
        'c': conditioned,
    })

    gm = GaussianMultivariate()
    gm.fit(data)

    sampled = gm.sample(3000, conditions={'b': 1})

    np.testing.assert_allclose(sampled['a'].mean(), 1, atol=.5)
    np.testing.assert_allclose(sampled['b'].mean(), 1, atol=.5)
    np.testing.assert_allclose(sampled['c'].mean(), 1, atol=.5)

    sampled = gm.sample(3000, conditions={'a': 3, 'b': 3})

    np.testing.assert_allclose(sampled['a'].mean(), 3, atol=.5)
    np.testing.assert_allclose(sampled['b'].mean(), 3, atol=.5)
    np.testing.assert_allclose(sampled['c'].mean(), 3, atol=.5)
Exemplo n.º 10
0
    def _gaussian(self, dataset):
        """
        For the given dataset, this runs "everything but the kitchen sink" (i.e.
        every feature of GaussianMultivariate that is officially supported) and
        makes sure it doesn't crash.
        """
        model = GaussianMultivariate({
            dataset.columns[0]: GaussianKDE()  # Use a KDE for the first column
        })
        model.fit(dataset)
        for N in [10, 100, 50]:
            assert len(model.sample(N)) == N
        sampled_data = model.sample(10)
        pdf = model.probability_density(sampled_data)
        cdf = model.cumulative_distribution(sampled_data)

        # Test Save/Load from Dictionary
        config = model.to_dict()
        model2 = GaussianMultivariate.from_dict(config)

        for N in [10, 100, 50]:
            assert len(model2.sample(N)) == N
        pdf2 = model2.probability_density(sampled_data)
        cdf2 = model2.cumulative_distribution(sampled_data)
        assert np.all(np.isclose(pdf, pdf2, atol=0.01))
        assert np.all(np.isclose(cdf, cdf2, atol=0.01))

        path_to_model = os.path.join(self.test_dir.name, "model.pkl")
        model.save(path_to_model)
        model2 = GaussianMultivariate.load(path_to_model)
        for N in [10, 100, 50]:
            assert len(model2.sample(N)) == N
        pdf2 = model2.probability_density(sampled_data)
        cdf2 = model2.cumulative_distribution(sampled_data)
        assert np.all(np.isclose(pdf, pdf2, atol=0.01))
        assert np.all(np.isclose(cdf, cdf2, atol=0.01))
Exemplo n.º 11
0
def fit_and_sample(lagged_zvalues:[[float]],num:int, copula=None):
    """ Example of fitting a copula function, and sampling
           lagged_zvalues: [ [z1,z2,z3] ]  distributed N(0,1) margins, roughly
           copula : Something from https://pypi.org/project/copulas/
           returns: [ [z1, z2, z3] ]  representative sample
    """
    # Remark: It's lazy to just sample synthetic data
    # Some more evenly spaced sampling would be preferable.
    # See https://www.microprediction.com/blog/lottery for discussion

    df = pd.DataFrame(data=lagged_zvalues)
    if copula is None:
        copula = GaussianMultivariate()
    copula.fit(df)
    synthetic = copula.sample(num)
    return synthetic.values.tolist()
Exemplo n.º 12
0
    def test_to_dict_from_dict(self):
        data = sample_trivariate_xyz()
        model = GaussianMultivariate()
        model.fit(data)

        sampled_data = model.sample(10)

        params = model.to_dict()
        model2 = GaussianMultivariate.from_dict(params)

        pdf = model.probability_density(sampled_data)
        pdf2 = model2.probability_density(sampled_data)
        assert np.all(np.isclose(pdf, pdf2, atol=0.01))

        cdf = model.cumulative_distribution(sampled_data)
        cdf2 = model2.cumulative_distribution(sampled_data)
        assert np.all(np.isclose(cdf, cdf2, atol=0.01))
Exemplo n.º 13
0
    def test_save_load(self):
        data = sample_trivariate_xyz()
        model = GaussianMultivariate()
        model.fit(data)

        sampled_data = model.sample(10)

        path_to_model = os.path.join(self.test_dir.name, "model.pkl")
        model.save(path_to_model)
        model2 = GaussianMultivariate.load(path_to_model)

        pdf = model.probability_density(sampled_data)
        pdf2 = model2.probability_density(sampled_data)
        assert np.all(np.isclose(pdf, pdf2, atol=0.01))

        cdf = model.cumulative_distribution(sampled_data)
        cdf2 = model2.cumulative_distribution(sampled_data)
        assert np.all(np.isclose(cdf, cdf2, atol=0.01))
Exemplo n.º 14
0
def get_errors_sample(errors, copula="Gumbel"):
    '''
    Parameters
    ----------
    errors : numpy array of shape (n,2)
        The errors to fit with a copula
    copula : string
        "Gumbel": fit with a Gumbel copula
        "Normal": fit with a Normal copula

    Returns
    synthetic : a new sample of errors obtain with the JPD of the errors
    -------
    None.
    '''

    ### Transforming the np.array to a dataframe
    df = pd.DataFrame(errors, columns=['res1', 'res2'])

    ### Transforming the errors series such that there is no 0 or 1
    # because this is leading to problems when using the copula
    df = pd.DataFrame(np.where(df == 0, 0.00000001,
                               np.where(df == 1, 0.99999999, df)),
                      columns=df.columns)
    scaler = MinMaxScaler()
    df = pd.DataFrame(scaler.fit_transform(df.values), columns=df.columns)

    ### Selecting afor fiting the errors series
    if copula == "Gumbel":
        c = gumbel.Gumbel()
    elif copula == "Normal":
        c = GaussianMultivariate()


### Fiting the copula and getting the parameters
    c.fit(df.values)
    #copulas_parameters=c.to_dict()

    ### Generating a sample from the copula
    synthetic = c.sample(len(df))
    synthetic = scaler.inverse_transform(synthetic)

    return synthetic
Exemplo n.º 15
0
def copula_based(X,Y):
    """
    Calculate joint PDF/CDF using copula
    """

    import pandas as pd
    from copulas.multivariate import GaussianMultivariate
    
    # fit gaussian copula
    data=pd.DataFrame(list(zip(X,Y)),columns=['P','T'])
    dist=GaussianMultivariate()
    dist.fit(data)

    sampled=dist.sample(1)
    sampled.at[0,'P']=np.mean(X)
    sampled.at[0,'T']=np.mean(Y)
    
    # find pdf/cdf at mean value
    pdf=dist.pdf(sampled)
    cdf=dist.cumulative_distribution(sampled)
    return [pdf,cdf]
Exemplo n.º 16
0
def fit_and_sample(lagged_zvalues: [[float]], num: int, copula=None):
    """ Example of creating a "sample" of future values
    
           lagged_zvalues:     [ [z1,z2,z3] ]  distributed N(0,1) margins, roughly
           copula :            Something from https://pypi.org/project/copulas/
           returns:            [ [z1, z2, z3] ]  representative sample

        Swap out this function for whatever you like. 
    """
    # Remark 1: It's lazy to just sample synthetic data
    # Remark 2: Any multivariate density estimation could go here.
    # Remark 3: If you prefer uniform margin, use mw.get_lagged_copulas(name=name, count= 5000)
    #
    # See https://www.microprediction.com/blog/lottery for discussion of this "game"

    df = pd.DataFrame(data=lagged_zvalues)
    if copula is None:
        copula = GaussianMultivariate()  # <---
    copula.fit(df)
    synthetic = copula.sample(num)
    return synthetic.values.tolist()
Exemplo n.º 17
0
def testMITCopulas():
    import warnings
    warnings.filterwarnings('ignore')

    from copulas.datasets import sample_trivariate_xyz
    from copulas.multivariate import GaussianMultivariate
    from copulas.visualization import compare_3d

    # Load a dataset with 3 columns that are not independent
    real_data = sample_trivariate_xyz()

    # Fit a gaussian copula to the data
    copula = GaussianMultivariate()
    copula.fit(real_data)

    # Sample synthetic data
    synthetic_data = copula.sample(len(real_data))

    # Plot the real and the synthetic data to compare
    compare_3d(real_data, synthetic_data)
    return True
Exemplo n.º 18
0
    def test_cdf(self):
        data = sample_trivariate_xyz()
        model = GaussianMultivariate()
        model.fit(data)

        sampled_data = model.sample(10)

        # Test CDF
        cdf = model.cumulative_distribution(sampled_data)
        assert (0 < cdf).all() and (cdf < 1).all()

        # Test CDF increasing function
        for column in sampled_data.columns:
            sorted_data = sampled_data.sort_values(column)
            other_columns = data.columns.to_list()
            other_columns.remove(column)
            row = sorted_data.sample(1).iloc[0]
            for column in other_columns:
                sorted_data[column] = row[column]

            cdf = model.cumulative_distribution(sorted_data)
            assert (np.diff(cdf) >= 0).all()
Exemplo n.º 19
0
def fit_and_sample(lagged_zvalues: [[float]], num: int, copula=None):
    """ Example of fitting a copula function, and sampling
           lagged_zvalues: [ [z1,z2,z3] ]  distributed N(0,1) margins, roughly
           copula : Something from https://pypi.org/project/copulas/
           returns: [ [z1, z2, z3] ]  representative sample

    """
    # This is the part you'll want to change.
    # Remark 1: It's lazy to just sample synthetic data
    # Some more evenly spaced sampling would be preferable.
    # Remark 2: Any multivariate density estimation could go here.
    # Remark 3: If you want to literally fit to a Copula (i.e. roughly uniform margins)
    # then you might want to use mw.get_lagged_copulas(name=name, count= 5000) instead
    #
    # See https://www.microprediction.com/blog/lottery for discussion of why evenly
    # spaced samples are likely to serve you better.

    df = pd.DataFrame(data=lagged_zvalues)
    if copula is None:
        copula = GaussianMultivariate()  # <---
    copula.fit(df)
    synthetic = copula.sample(num)
    return synthetic.values.tolist()
Exemplo n.º 20
0
class GaussianCopula(SDVModel):
    """Model wrapping ``copulas.multivariate.GaussianMultivariate`` copula.

    Args:
        distribution (copulas.univariate.Univariate or str):
            Copulas univariate distribution to use.

    Example:
        The example below shows simple usage case where a ``GaussianMultivariate``
        is being created and its ``fit`` and ``sample`` methods are being called.

        >>> model = GaussianMultivariate()
        >>> model.fit(pd.DataFrame({'a_field': list(range(10))}))
        >>> model.sample(5)
            a_field
        0  4.796559
        1  7.395329
        2  7.400417
        3  2.794212
        4  1.925887
    """

    DISTRIBUTION = GaussianUnivariate
    distribution = None
    model = None

    def __init__(self, distribution=None):
        self.distribution = distribution or self.DISTRIBUTION

    def fit(self, table_data):
        """Fit the model to the table.

        Impute the table data before fit the model.

        Args:
            table_data (pandas.DataFrame):
                Data to be fitted.
        """
        table_data = impute(table_data)
        self.model = GaussianMultivariate(distribution=self.distribution)
        self.model.fit(table_data)

    def sample(self, num_samples):
        """Sample ``num_samples`` rows from the model.

        Args:
            num_samples (int):
                Amount of rows to sample.

        Returns:
            pandas.DataFrame:
                Sampled data with the number of rows specified in ``num_samples``.
        """
        return self.model.sample(num_samples)

    def get_parameters(self):
        """Get copula model parameters.

        Compute model ``covariance`` and ``distribution.std``
        before it returns the flatten dict.

        Returns:
            dict:
                Copula flatten parameters.
        """
        values = list()
        triangle = np.tril(self.model.covariance)

        for index, row in enumerate(triangle.tolist()):
            values.append(row[:index + 1])

        self.model.covariance = np.array(values)
        params = self.model.to_dict()
        univariates = dict()
        for name, univariate in zip(params.pop('columns'),
                                    params['univariates']):
            univariates[name] = univariate
            if 'scale' in univariate:
                scale = univariate['scale']
                if scale == 0:
                    scale = EPSILON

                univariate['scale'] = np.log(scale)

        params['univariates'] = univariates

        return flatten_dict(params)

    def _prepare_sampled_covariance(self, covariance):
        """Prepare a covariance matrix.

        Args:
            covariance (list):
                covariance after unflattening model parameters.

        Result:
            list[list]:
                symmetric Positive semi-definite matrix.
        """
        covariance = np.array(square_matrix(covariance))
        covariance = (covariance + covariance.T -
                      (np.identity(covariance.shape[0]) * covariance))

        if not check_matrix_symmetric_positive_definite(covariance):
            covariance = make_positive_definite(covariance)

        return covariance.tolist()

    def _unflatten_gaussian_copula(self, model_parameters):
        """Prepare unflattened model params to recreate Gaussian Multivariate instance.

        The preparations consist basically in:

            - Transform sampled negative standard deviations from distributions into positive
              numbers

            - Ensure the covariance matrix is a valid symmetric positive-semidefinite matrix.

            - Add string parameters kept inside the class (as they can't be modelled),
              like ``distribution_type``.

        Args:
            model_parameters (dict):
                Sampled and reestructured model parameters.

        Returns:
            dict:
                Model parameters ready to recreate the model.
        """
        univariate_kwargs = {'type': model_parameters['distribution']}

        columns = list()
        univariates = list()
        for column, univariate in model_parameters['univariates'].items():
            columns.append(column)
            univariate.update(univariate_kwargs)
            univariate['scale'] = np.exp(univariate['scale'])
            univariates.append(univariate)

        model_parameters['univariates'] = univariates
        model_parameters['columns'] = columns

        covariance = model_parameters.get('covariance')
        model_parameters['covariance'] = self._prepare_sampled_covariance(
            covariance)

        return model_parameters

    def set_parameters(self, parameters):
        """Set copula model parameters.

        Add additional keys after unflatte the parameters
        in order to set expected parameters for the copula.

        Args:
            dict:
                Copula flatten parameters.
        """
        parameters = unflatten_dict(parameters)
        parameters.setdefault('fitted', True)
        parameters.setdefault('distribution', self.distribution)

        parameters = self._unflatten_gaussian_copula(parameters)

        self.model = GaussianMultivariate.from_dict(parameters)
Exemplo n.º 21
0
df_uniform = uniformise_normal_data(df0)

pd.DataFrame(df_uniform[:,0]).hist()

test = np.random.normal(size=600)
dd = pd.DataFrame({'t' : test , 'a':test })
pd.Series(test).hist()

c = gumbel.Gumbel()
c.fit(dd)

c2 = GaussianMultivariate()
c2.fit(df0)

c2.sample(len(df0))

pd.Series(c.probability_density(df.values)).hist()
pd.Series(c.cumulative_distribution(df.values)).hist()

c.partial_derivative(df.values)



c.compute_theta()



synthetic = c.sample(len(df))

h = sns.jointplot(df.iloc[:, 0], df.iloc[:, 1], kind='kde', stat_func=None)
Exemplo n.º 22
0
cross_indices, cross_clock_genes, cross_scores = FourierClock.cross_corr(
    X_data, Y_copy, X_ID)
cross_scores = np.abs(np.array(cross_scores))

scores = np.concatenate((auto_scores.reshape(-1, 1), cross_scores.reshape(
    -1, 1), arser_scores['fdr_BH'].values.reshape(
        -1, 1), jtk_scores['ADJ.P'].values.reshape(-1, 1)),
                        axis=1)

scores[:, 2:] = 1 - scores[:, 2:]

num_resamples = 1000  # Change to 50,000/100,000

gcopula = GaussianMultivariate()
gcopula.fit(scores)
random_sample = gcopula.sample(num_resamples)
sample_scores = pd.DataFrame(random_sample)
mean = np.mean(sample_scores.values, axis=0)
covariance = np.cov(sample_scores.T)
dist = mvn(mean=mean, cov=covariance, allow_singular=True)

gene_scores = []
for i in range(scores.shape[0]):
    gene_scores.append(dist.cdf(x=scores[i, :]))

gene_scores = np.array(gene_scores)
gene_scores = np.concatenate(
    (arser_scores['CycID'].values.reshape(-1, 1), gene_scores.reshape(-1, 1)),
    axis=1)

gene_scores = gene_scores[gene_scores[:, 1].argsort()[::-1]]
Exemplo n.º 23
0
#!/usr/bin/env python
'''
Given a tabular dataset, fit a copula to it.
'''

import matplotlib.pyplot as plt
import pandas as pd
from copulas.multivariate import GaussianMultivariate
from copulas.visualization import compare_3d

df = pd.read_csv('samples.csv')
cols = ['x1', 'x2', 'x3']

copula = GaussianMultivariate()
copula.fit(df[cols])

# generate synthetic data from our fit
sd = copula.sample(df.shape[0])

compare_3d(df[cols], sd)
plt.show()
Exemplo n.º 24
0
g.map_offdiag(sns.scatterplot)

var_sp = rst_sp.forecast().variance.dropna().values[0]
var_tn = rst_tn.forecast().variance.dropna().values[0]

vol_sp = np.sqrt(var_sp)
vol_tn = np.sqrt(var_tn)

vol = np.array([vol_sp, vol_tn])

n = 10000

## copulas package

gaus_cop1 = GaussianMultivariate()
gaus_cop1.fit(filtered_returns)

print(gaus_cop1.covariance)

samples1 = gaus_cop1.sample(n).values
scale_samples1 = samples1 * vol.T

sim_returns1 = 0.5 * (scale_samples1[:, 0] + scale_samples1[:, 1])

sorted_returns1 = np.sort(sim_returns1)

sns.distplot(sorted_returns1)

var = stats.scoreatpercentile(sorted_returns1, 1)
es = np.mean(sorted_returns1[:100])
Exemplo n.º 25
0
class Sample:
    def __init__(self, load, mode, pv_connection, ev_connection, ev_max_connection, pld_pred):
        self.load = pd.DataFrame(load)
        self.mode = mode
        self.copula = GaussianMultivariate()
        self.pv_connection = pv_connection
        self.ev_connection = ev_connection
        self.ev_max_connection = ev_max_connection
        self.pld_pred = pld_pred

    def get_load_sample_nrtp(self, bus, pv_curve, ev_curve):
        load = {}
        if self.mode == 0:
            for n in range(bus):
                load[n] = np.random.normal(self.load.iloc[:, n],
                                           SD_LOAD)  # distribuição normal [Morshed, 2018]  [Unidade: kWh]
        elif self.mode == 1:
            for n in range(bus):
                load_aux = np.random.normal(self.load.iloc[:, n], SD_LOAD)
                load[n] = [float(item) for item in load_aux]
            df = pd.DataFrame.from_dict(load)
            self.copula.fit(df)
            load = self.copula.sample(len(df))
        elif self.mode == 2:
            load_aux = {}
            pv_total_curve = {0: pv_curve[0], 1: pv_curve[1], 2: pv_curve[2], 3: pv_curve[2], 4: pv_curve[3],
                              5: pv_curve[4], 6: pv_curve[5], 7: pv_curve[5], 8: pv_curve[5], 9: pv_curve[6],
                              10: pv_curve[7], 11: pv_curve[7], 12: pv_curve[7], 13: pv_curve[7],
                              14: pv_curve[8], 15: pv_curve[8], 16: pv_curve[8], 17: pv_curve[9],
                              18: pv_curve[9], 19: pv_curve[10], 20: pv_curve[11], 21: pv_curve[12],
                              22: pv_curve[13]}
            pv_number = 0
            for n in range(bus):
                load[n] = np.random.normal(self.load.iloc[:, n], SD_LOAD)
                if n in self.pv_connection:
                    load_aux[0] = [float(item) for item in load[n]]
                    pv_aux = pv_total_curve[pv_number]
                    load_aux[1] = [float(item) for item in pv_aux]
                    df = pd.DataFrame.from_dict(load_aux)
                    self.copula.fit(df)
                    load_sample = self.copula.sample(len(df))
                    load[n] = load_sample[0]
                    pv_number += 1
        elif self.mode == 3:
            load_aux = {}
            ev_total_curve = {0: ev_curve[0], 1: ev_curve[0], 2: ev_curve[0], 3: ev_curve[1], 4: ev_curve[1],
                              5: ev_curve[1], 6: ev_curve[1], 7: ev_curve[2], 8: ev_curve[2], 9: ev_curve[3],
                              10: ev_curve[3], 11: ev_curve[3], 12: ev_curve[3], 13: ev_curve[3],
                              14: ev_curve[3], 15: ev_curve[3], 16: ev_curve[4]}
            ev_number = 0
            for n in range(bus):
                load[n] = np.random.normal(self.load.iloc[:, n], SD_LOAD)
                if n in self.ev_connection:
                    load_aux[0] = [float(item) for item in load[n]]
                    ev_aux = ev_total_curve[ev_number]
                    load_aux[1] = [float(item) for item in ev_aux]
                    df = pd.DataFrame.from_dict(load_aux)
                    self.copula.fit(df)
                    load_sample = self.copula.sample(len(df))
                    load[n] = load_sample[0]
                    ev_number += 1
        elif self.mode == 4:
            load_aux = {}
            pv_total_curve = {0: pv_curve[0], 1: pv_curve[1], 2: pv_curve[2], 3: pv_curve[2], 4: pv_curve[3],
                              5: pv_curve[4], 6: pv_curve[5], 7: pv_curve[5], 8: pv_curve[5], 9: pv_curve[6],
                              10: pv_curve[7], 11: pv_curve[7], 12: pv_curve[7], 13: pv_curve[7],
                              14: pv_curve[8], 15: pv_curve[8], 16: pv_curve[8], 17: pv_curve[9],
                              18: pv_curve[9], 19: pv_curve[10], 20: pv_curve[11], 21: pv_curve[12],
                              22: pv_curve[13]}
            ev_total_curve = {0: ev_curve[0], 1: ev_curve[0], 2: ev_curve[0], 3: ev_curve[1], 4: ev_curve[1],
                              5: ev_curve[1], 6: ev_curve[1], 7: ev_curve[2], 8: ev_curve[2], 9: ev_curve[3],
                              10: ev_curve[3], 11: ev_curve[3], 12: ev_curve[3], 13: ev_curve[3],
                              14: ev_curve[3], 15: ev_curve[3], 16: ev_curve[4]}
            pv_number = 0
            ev_number = 0
            for n in range(bus):
                load[n] = np.random.normal(self.load.iloc[:, n], SD_LOAD)
                if (n in self.pv_connection) and (n in self.ev_connection):
                    load_aux[0] = [float(item) for item in load[n]]
                    pv_aux = pv_total_curve[pv_number]
                    load_aux[1] = [float(item) for item in pv_aux]
                    ev_aux = ev_total_curve[ev_number]
                    load_aux[2] = [float(item) for item in ev_aux]
                    df = pd.DataFrame.from_dict(load_aux)
                    self.copula.fit(df)
                    load_sample = self.copula.sample(len(df))
                    load[n] = load_sample[0]
                    pv_number += 1
                    ev_number += 1
                elif n in self.pv_connection:
                    load_aux[0] = [float(item) for item in load[n]]
                    pv_aux = pv_total_curve[pv_number]
                    load_aux[1] = [float(item) for item in pv_aux]
                    df = pd.DataFrame.from_dict(load_aux)
                    self.copula.fit(df)
                    load_sample = self.copula.sample(len(df))
                    load[n] = load_sample[0]
                    pv_number += 1
                elif n in self.ev_connection:
                    load_aux[0] = [float(item) for item in load[n]]
                    ev_aux = ev_total_curve[ev_number]
                    load_aux[1] = [float(item) for item in ev_aux]
                    df = pd.DataFrame.from_dict(load_aux)
                    self.copula.fit(df)
                    load_sample = self.copula.sample(len(df))
                    load[n] = load_sample[0]
                    ev_number += 1

        return load

    def get_load_sample_rtp(self, bus, pv_curve, ev_curve):
        load = {}
        load_aux = {}
        load_aux[0] = [float(-item) for item in self.pld_pred]
        if self.mode == 0:
            for n in range(bus):
                load[n] = np.random.normal(self.load.iloc[:, n],
                                           SD_LOAD)  # distribuição normal [Morshed, 2018]  [Unidade: kWh]
                load_aux[1] = [float(item) for item in load[n]]
                df = pd.DataFrame.from_dict(load_aux)
                self.copula.fit(df)
                load_sample = self.copula.sample(len(df))
                load[n] = load_sample[1]
        elif self.mode == 1:
            for n in range(bus):
                load[n] = np.random.normal(self.load.iloc[:, n], SD_LOAD)
                load_aux[1] = [float(item) for item in load[n]]
                df = pd.DataFrame.from_dict(load_aux)
                self.copula.fit(df)
                load_sample = self.copula.sample(len(df))
                load[n] = load_sample[1]
            df = pd.DataFrame.from_dict(load)
            self.copula.fit(df)
            load = self.copula.sample(len(df))
        elif self.mode == 2:
            pv_total_curve = {0: pv_curve[0], 1: pv_curve[1], 2: pv_curve[2], 3: pv_curve[2], 4: pv_curve[3],
                              5: pv_curve[4], 6: pv_curve[5], 7: pv_curve[5], 8: pv_curve[5], 9: pv_curve[6],
                              10: pv_curve[7], 11: pv_curve[7], 12: pv_curve[7], 13: pv_curve[7],
                              14: pv_curve[8], 15: pv_curve[8], 16: pv_curve[8], 17: pv_curve[9],
                              18: pv_curve[9], 19: pv_curve[10], 20: pv_curve[11], 21: pv_curve[12],
                              22: pv_curve[13]}
            pv_number = 0
            for n in range(bus):
                load[n] = np.random.normal(self.load.iloc[:, n], SD_LOAD)
                if n in self.pv_connection:
                    load_aux[1] = [float(item) for item in load[n]]
                    pv_aux = pv_total_curve[pv_number]
                    load_aux[2] = [float(item) for item in pv_aux]
                    df = pd.DataFrame.from_dict(load_aux)
                    self.copula.fit(df)
                    load_sample = self.copula.sample(len(df))
                    load[n] = load_sample[1]
                    pv_number += 1
        elif self.mode == 3:
            ev_total_curve = {0: ev_curve[0], 1: ev_curve[0], 2: ev_curve[0], 3: ev_curve[1], 4: ev_curve[1],
                              5: ev_curve[1], 6: ev_curve[1], 7: ev_curve[2], 8: ev_curve[2], 9: ev_curve[3],
                              10: ev_curve[3], 11: ev_curve[3], 12: ev_curve[3], 13: ev_curve[3],
                              14: ev_curve[3], 15: ev_curve[3], 16: ev_curve[4]}
            ev_number = 0
            for n in range(bus):
                load[n] = np.random.normal(self.load.iloc[:, n], SD_LOAD)
                if n in self.ev_connection:
                    load_aux[1] = [float(item) for item in load[n]]
                    ev_aux = ev_total_curve[ev_number]
                    load_aux[2] = [float(item) for item in ev_aux]
                    df = pd.DataFrame.from_dict(load_aux)
                    self.copula.fit(df)
                    load_sample = self.copula.sample(len(df))
                    load[n] = load_sample[1]
                    ev_number += 1
        elif self.mode == 4:
            pv_total_curve = {0: pv_curve[0], 1: pv_curve[1], 2: pv_curve[2], 3: pv_curve[2], 4: pv_curve[3],
                              5: pv_curve[4], 6: pv_curve[5], 7: pv_curve[5], 8: pv_curve[5], 9: pv_curve[6],
                              10: pv_curve[7], 11: pv_curve[7], 12: pv_curve[7], 13: pv_curve[7],
                              14: pv_curve[8], 15: pv_curve[8], 16: pv_curve[8], 17: pv_curve[9],
                              18: pv_curve[9], 19: pv_curve[10], 20: pv_curve[11], 21: pv_curve[12],
                              22: pv_curve[13]}
            ev_total_curve = {0: ev_curve[0], 1: ev_curve[0], 2: ev_curve[0], 3: ev_curve[1], 4: ev_curve[1],
                              5: ev_curve[1], 6: ev_curve[1], 7: ev_curve[2], 8: ev_curve[2], 9: ev_curve[3],
                              10: ev_curve[3], 11: ev_curve[3], 12: ev_curve[3], 13: ev_curve[3],
                              14: ev_curve[3], 15: ev_curve[3], 16: ev_curve[4]}
            pv_number = 0
            ev_number = 0
            for n in range(bus):
                load[n] = np.random.normal(self.load.iloc[:, n], SD_LOAD)
                if (n in self.pv_connection) and (n in self.ev_connection):
                    load_aux[1] = [float(item) for item in load[n]]
                    pv_aux = pv_total_curve[pv_number]
                    load_aux[2] = [float(item) for item in pv_aux]
                    ev_aux = ev_total_curve[ev_number]
                    load_aux[3] = [float(item) for item in ev_aux]
                    df = pd.DataFrame.from_dict(load_aux)
                    self.copula.fit(df)
                    load_sample = self.copula.sample(len(df))
                    load[n] = load_sample[1]
                    pv_number += 1
                    ev_number += 1
                elif n in self.pv_connection:
                    load_aux[1] = [float(item) for item in load[n]]
                    pv_aux = pv_total_curve[pv_number]
                    load_aux[2] = [float(item) for item in pv_aux]
                    df = pd.DataFrame.from_dict(load_aux)
                    self.copula.fit(df)
                    load_sample = self.copula.sample(len(df))
                    load[n] = load_sample[1]
                    pv_number += 1
                elif n in self.ev_connection:
                    load_aux[1] = [float(item) for item in load[n]]
                    ev_aux = ev_total_curve[ev_number]
                    load_aux[2] = [float(item) for item in ev_aux]
                    df = pd.DataFrame.from_dict(load_aux)
                    self.copula.fit(df)
                    load_sample = self.copula.sample(len(df))
                    load[n] = load_sample[1]
                    ev_number += 1

        return load

    # FUNÇÃO DE POTÊCIA GERADA DA PV
    def get_pv_sample(self, bus):
        pv_sample = {}
        for i in range(bus):
            # Função de distribuição de probabilidade da radiação solar
            radiation = ss.beta.pdf(np.linspace(0, 1, 24), ALFA_PV, BETA_PV, 0, 1)  # beta [Yaotang, 2016]
            radiation = radiation * R_FACTOR
            # Configurando a curva da potência gerada kW
            pv = [0] * 24
            for n in range(np.size(pv)):
                if 0 <= radiation[n] < R_CERTAIN_POINT:
                    pv[n] = PV_POWER_GENERATION * (radiation[n] ** 2 / (R_CERTAIN_POINT * R_STANDARD_CONDITION))
                elif R_CERTAIN_POINT <= radiation[n] < R_STANDARD_CONDITION:
                    pv[n] = PV_POWER_GENERATION * (radiation[n] / R_STANDARD_CONDITION)
                elif radiation[n] >= R_STANDARD_CONDITION:
                    pv[n] = PV_POWER_GENERATION
            pv_sample[i] = pv

        return pv_sample

    # CONFIGURANDO AMOSTRA EV
    def get_ev_sample(self, bus, mode):
        ev_curve = {}
        ev_curve_aux = [0] * 24
        ev_power = {}
        ev_power_aux = 0
        ev_incoming = [0] * bus
        ev_t_duration = []
        for bus_i in range(bus):
            # Quantidade de EV
            ev_incoming[bus_i] = np.random.randint(int(self.ev_max_connection[bus_i] / 3),
                                                   self.ev_max_connection[bus_i])
            for ev_i in range(ev_incoming[bus_i]):
                # SOC do veículo elétrico
                soc_init, soc_min, soc_hini = get_ev_soc()
                # Estimando tempos do carregamento
                t_duration_charge = 0
                while t_duration_charge <= 0:
                    choice = np.random.randint(1, 4)
                    if choice == 1:
                        t_duration_charge = np.random.randint(1, 6)
                    elif choice == 2:
                        t_duration_charge = round(np.random.normal(3, 0.50))
                    else:
                        t_duration_charge = round(np.random.normal(6, 0.75))
                ev_t_duration.append(t_duration_charge)
                t_start_charge = int(np.random.normal(MU_EV_HOUR_ARRIVE, SD_EV_HOUR))
                while t_start_charge > 24:
                    t_start_charge = int(np.random.normal(MU_EV_HOUR_ARRIVE, SD_EV_HOUR))
                # Construindo curva
                curve = [0] * (t_start_charge - 1)
                curve.extend([1] * t_duration_charge)
                if len(curve) < 24:
                    curve.extend([0] * (24 - len(curve)))
                else:
                    curve_aux = curve[24:]
                    n = len(curve_aux)
                    for i in range(n):
                        curve[i] = curve_aux[i]
                ev_curve_aux = ev_curve_aux + np.asarray(curve[0:24])
                # Energia do carro
                energy = (soc_init - soc_hini) * EV_BATTERY_CAPACITY
                ev_power_aux = ev_power_aux + energy / t_duration_charge
            if mode == 1:
                ev_curve_aux = [-item for item in ev_curve_aux]
                charge_time = np.zeros(8)
                discharge_time = np.random.randint(5, 15, 8)
                discharge_curve = np.concatenate((charge_time, discharge_time))
                discharge_curve = np.concatenate((discharge_curve, charge_time))
                ev_curve_aux = ev_curve_aux + discharge_curve
            ev_curve[bus_i] = ev_curve_aux
            ev_power[bus_i] = ev_power_aux

        return ev_curve, ev_power, ev_incoming, ev_t_duration, EV_BATTERY_CAPACITY