def test_weighted_combined_copula3d(self): dimkeys = ["solar", "wind", "tide"] dimension = len(dimkeys) ourmean = [0, 0, 0] ourcov = [[1, 0.1, 0.3], [0.1, 2, 0], [0.3, 0, 3]] marginals = {"solar": UnivariateNormalDistribution(var=ourcov[0][0], mean=ourmean[0]), "wind": UnivariateNormalDistribution(var=ourcov[1][1], mean=ourmean[1]), "tide": UnivariateNormalDistribution(var=ourcov[2][2], mean=ourmean[2])} data_array = np.random.multivariate_normal(ourmean, ourcov, 10000) data_dict = dict.fromkeys(dimkeys) for i in range(dimension): data_dict[dimkeys[i]] = data_array[:, i] copulas= ['student-copula', 'gaussian-copula'] list_of_gaussian = ['gaussian-copula','gaussian-copula'] list_of_student = ['student-copula','student-copula'] weights =[0.12,0.88] mydistr = WeightedCombinedCopula(dimkeys,data_dict,marginals,copulas,weights) gaussian = GaussianCopula(dimkeys,data_dict,marginals) weightedgaussian = WeightedCombinedCopula(dimkeys,data_dict,marginals,list_of_gaussian,weights) weightedstudent = WeightedCombinedCopula(dimkeys, data_dict, marginals, list_of_student, weights) student = StudentCopula(dimkeys,data_dict,marginals) g = gaussian.c_log_likelihood() s = student.c_log_likelihood() m = mydistr.c_log_likelihood() self.assertAlmostEqual(weightedgaussian.c_log_likelihood(),g,7) self.assertAlmostEqual(weightedstudent.c_log_likelihood(),s,7) self.assertGreater(g,m) self.assertGreater(m,s)
def test_with_multinormal_4_dim(self): dimkeys = ["solar", "wind", "tide","geo"] dimension = len(dimkeys) ourmean = [0, 0, 0, 0] ourcov = [[1, 0.1, 0.3,0.4], [0.1, 2, 0,0], [0.3, 0, 3,0],[0.4,0,0,4]] marginals = {"solar": UnivariateNormalDistribution(var=ourcov[0][0], mean=ourmean[0]), "wind": UnivariateNormalDistribution(var=ourcov[1][1], mean=ourmean[1]), "tide": UnivariateNormalDistribution(var=ourcov[2][2], mean=ourmean[2]), "geo":UnivariateNormalDistribution(var=ourcov[3][3], mean=ourmean[3])} valuedict = {"solar": 0, "wind": 0, "tide": 0,"geo":0} lowerdict = {"solar": -1, "wind": -1, "tide": -1,"geo":-2} upperdict = {"solar": 1, "wind": 1, "tide": 1,"geo":2} data_array = np.random.multivariate_normal(ourmean, ourcov, 10000) data_dict = dict.fromkeys(dimkeys) for i in range(dimension): data_dict[dimkeys[i]] = data_array[:, i] pair_copulae_strings = [[None, 'gaussian-copula', 'gaussian-copula','gaussian-copula'], [None, None, 'gaussian-copula','gaussian-copula'], [None, None, None,'gaussian-copula'], [None,None,None,None]] with Timer('MultiNormal'): multigaussian = MultiNormalDistribution(dimkeys, input_data=data_dict) print(multigaussian.rect_prob(lowerdict, upperdict)) cvine = CVineCopula(dimkeys, data_dict, marginals, pair_copulae_strings) with Timer('CVine rect_prob calculus'): print(cvine.rect_prob(lowerdict, upperdict)) dvine = DVineCopula(dimkeys, data_dict, marginals, pair_copulae_strings) with Timer('DVine rect_prob calculus'): print(dvine.rect_prob(lowerdict, upperdict))
def test_quick_dim_3(self): dimkeys = ["solar", "wind", "tide"] dimension = len(dimkeys) ourmean = [0, 0, 0] ourcov = [[1, 0.1, 0.3], [0.1, 2, 0], [0.3, 0, 3]] marginals = {"solar": UnivariateNormalDistribution(var=ourcov[0][0], mean=ourmean[0]), "wind": UnivariateNormalDistribution(var=ourcov[1][1], mean=ourmean[1]), "tide": UnivariateNormalDistribution(var=ourcov[2][2], mean=ourmean[2])} data_array = np.random.multivariate_normal(ourmean, ourcov, 10000) data_dict = dict.fromkeys(dimkeys) for i in range(dimension): data_dict[dimkeys[i]] = data_array[:, i] pair_copulae_strings = [[None, 'student-copula', 'frank-copula'], [None, None, 'clayton-copula'], [None, None, None]] valuedict = {"solar": 0.43, "wind": 0.92, "tide": 0.27} print('CVine') CVine = CVineCopula(dimkeys, data_dict, marginals, pair_copulae_strings) print(CVine.C(valuedict=valuedict)) print(CVine.c(valuedict)) print('DVine') DVine = DVineCopula(dimkeys, data_dict, marginals, pair_copulae_strings) print(DVine.C(valuedict=valuedict)) print(DVine.c(valuedict))
def test_with_gaussian_copula_3_dim(self): dimkeys = ["solar", "wind", "tide"] dimension = len(dimkeys) # dictin = {"solar": np.random.randn(200), "wind": np.random.randn(200)} ourmean = [0, 0, 0] ourcov = [[1, 0.1, 0.3], [0.1, 2, 0], [0.3, 0, 3]] marginals = {"solar": UnivariateNormalDistribution(var=ourcov[0][0], mean=ourmean[0]), "wind": UnivariateNormalDistribution(var=ourcov[1][1], mean=ourmean[1]), "tide": UnivariateNormalDistribution(var=ourcov[2][2], mean=ourmean[2])} valuedict = {"solar": 0, "wind": 0, "tide": 0} lowerdict = {"solar": -1, "wind": -1, "tide": -1} upperdict = {"solar": 1, "wind": 1, "tide": 1} data_array = np.random.multivariate_normal(ourmean, ourcov, 1000) data_dict = dict.fromkeys(dimkeys) for i in range(dimension): GaussianCopula(dimkeys, data_dict, marginals, pair_copulae_strings) data_dict[dimkeys[i]] = data_array[:, i] multigaussian1 = GaussianCopula(input_data=data_dict, dimkeys=dimkeys, marginals=marginals, quadstep=0.1) multigaussian2 = MultiNormalDistribution(dimkeys, input_data=data_dict) self.assertAlmostEqual(multigaussian1.rect_prob(lowerdict, upperdict), multigaussian2.rect_prob(lowerdict, upperdict), 2) self.assertAlmostEqual(multigaussian1.rect_prob(lowerdict, upperdict),multigaussian2.rect_prob(lowerdict, upperdict), 1)
def test_gaussian_copula(self): n = 10000 dimkeys = ["solar", "wind"] dimension = len(dimkeys) ourmean = [2, 3] ourmeandict = {"solar": 0, "wind": 0} rho =0.5 rho2 = 0.5 ourcov = [[1, rho], [rho, 1]] ourcov2 = [[1, rho2], [rho2, 1]] marginals = {"solar": UnivariateNormalDistribution(var=ourcov[0][0], mean=ourmean[0]), "wind": UnivariateNormalDistribution(var=ourcov[1][1], mean=ourmean[1])} data_array = np.random.multivariate_normal(ourmean, ourcov, 100000) data_array2 = np.random.multivariate_normal(ourmean, ourcov2, 100000) data_dict = dict.fromkeys(dimkeys) for i in range(dimension): data_dict[dimkeys[i]] = data_array[:, i] data_dict2 = dict.fromkeys(dimkeys) for i in range(dimension): data_dict2[dimkeys[i]] = data_array2[:, i] multigaussian1 = GaussianCopula(input_data=data_dict, dimkeys=dimkeys, marginals=marginals, quadstep=0.001) multigaussian2 = GaussianCopula(input_data=data_dict2, dimkeys=dimkeys, marginals=marginals, quadstep=0.001) rank_data = multigaussian2.generates_U(10000) diag(2).rank_histogram(rank_data, 20, multigaussian1)
def test_quick_dim_2(self): dimkeys = ["solar", "wind"] dimension = len(dimkeys) ourmean = [1, 0.5] ourcov = [[1, 0.3], [0.3, 2]] marginals = {"solar": UnivariateNormalDistribution(var=ourcov[0][0], mean=ourmean[0]), "wind": UnivariateNormalDistribution(var=ourcov[1][1], mean=ourmean[1])} data_array = np.random.multivariate_normal(ourmean, ourcov, 10000) data_dict = dict.fromkeys(dimkeys) for i in range(dimension): data_dict[dimkeys[i]] = data_array[:, i] pair_copulae_strings = [[None, 'student-copula'], [None, None]] valuedict = {"solar": 0.96, "wind": 0.87} CVine = CVineCopula(dimkeys, data_dict, marginals, pair_copulae_strings) DVine = DVineCopula(dimkeys, data_dict, marginals, pair_copulae_strings) gaussiancopula = GaussianCopula(dimkeys,data_dict,marginals) gaussiancopula.c(valuedict) self.assertAlmostEqual(CVine.C(valuedict),DVine.C(valuedict),1) self.assertAlmostEqual(gaussiancopula.C(valuedict), DVine.C(valuedict), 1) self.assertAlmostEqual(CVine.C(valuedict), gaussiancopula.C(valuedict), 1)
def test_gaussian_copula(self): #not finished yet print("Warning test not finished yet") n = 10000 dimkeys = ["solar", "wind"] dimension = len(dimkeys) ourmean = [2, 3] ourmeandict = {"solar": 0, "wind": 0} rho =0.1 rho2 = 0.9 ourcov = [[1, rho], [rho, 1]] ourcov2 = [[1, rho2], [rho2, 1]] marginals = {"solar": UnivariateNormalDistribution(var=ourcov[0][0], mean=ourmean[0]), "wind": UnivariateNormalDistribution(var=ourcov[1][1], mean=ourmean[1])} data_array = np.random.multivariate_normal(ourmean, ourcov, 100000) data_array2 = np.random.multivariate_normal(ourmean, ourcov2, 100000) data_dict = dict.fromkeys(dimkeys) for i in range(dimension): data_dict[dimkeys[i]] = data_array[:, i] data_dict2 = dict.fromkeys(dimkeys) for i in range(dimension): data_dict2[dimkeys[i]] = data_array2[:, i] multigaussian1 = GaussianCopula(input_data=data_dict, dimkeys=dimkeys, marginals=marginals, quadstep=0.001) multigaussian2 = GaussianCopula(input_data=data_dict2, dimkeys=dimkeys, marginals=marginals, quadstep=0.001) print(emd_sort(data_array,data_array)) print(emd_sort(data_array2, data_array)) print(emd_sort(data_array2, data_array2))
def initialize(dim=2,precision = None,copula_string='independence-copula'): if dim==1: mymean = 0 myvar = 2 dimkeys = ["solar"] data_array = np.random.multivariate_normal([mymean], [[myvar]], 1000) dictin = {"solar": data_array[:, 0]} distr_class = distribution_factory(copula_string) mydistr = distr_class(dimkeys, dictin) return mydistr if dim==2: # For some tests, gaussian and student are less precised so we change so precision asked : dimkeys = ["solar", "wind"] ourmean = [3, 4] rho=0.5 ourcov = [[1, rho], [rho, 1]] data_array = np.random.multivariate_normal(ourmean, ourcov, 1000) dictin = dict.fromkeys(dimkeys) for i in range(dim): dictin[dimkeys[i]] = data_array[:, i] valuedict = {"solar": 0.14, "wind": 0.49} distr_class = distribution_factory(copula_string) mydistr = distr_class(dimkeys, dictin) return mydistr if dim==3: dimkeys = ["solar", "wind", "tide"] dimension = len(dimkeys) # dictin = {"solar": np.random.randn(200), "wind": np.random.randn(200)} ourmean = [0, 0, 0] rho01 = 0.1 rho02 = 0.3 rho12 = 0 ourcov = [[1, rho01, rho02], [rho01, 2, rho12], [rho02, rho12, 3]] marginals = {"solar": UnivariateNormalDistribution(var=ourcov[0][0], mean=ourmean[0]), "wind": UnivariateNormalDistribution(var=ourcov[1][1], mean=ourmean[1]), "tide": UnivariateNormalDistribution(var=ourcov[2][2], mean=ourmean[2])} data_array = np.random.multivariate_normal(ourmean, ourcov, 1000) dictin = dict.fromkeys(dimkeys) for i in range(dimension): dictin[dimkeys[i]] = data_array[:, i] distr_class = distribution_factory(copula_string) mydistr = distr_class(dimkeys, dictin) return mydistr
def test_plot(self): dimkeys = ["solar", "wind", "tide"] dimension = len(dimkeys) ourmean = [0, 0, 0] ourcov = [[1, 1.3, 1.2], [1.3, 2, 0], [1.2, 0, 1.5]] marginals = {"solar": UnivariateNormalDistribution(var=ourcov[0][0], mean=ourmean[0]), "wind": UnivariateNormalDistribution(var=ourcov[1][1], mean=ourmean[1]), "tide": UnivariateNormalDistribution(var=ourcov[2][2], mean=ourmean[2])} data_array = np.random.multivariate_normal(ourmean, ourcov, 10000) data_dict = dict.fromkeys(dimkeys) for i in range(dimension): data_dict[dimkeys[i]] = data_array[:, i] pair_copulae_strings = [[None, 'gaussian-copula', 'frank-copula'], [None, None, 'gaussian-copula'], [None, None, None]] valuedict = {"solar": 1, "wind": 1, "tide": 0.73} lowerdict = {"solar": -3, "wind": -2, "tide": 0} upperdict = {"solar": 0.5, "wind": 1, "tide": 1} mydistr = DVineCopula(dimkeys, data_dict, marginals, pair_copulae_strings) n = 20 #number of points to display U = mydistr.generates_U(n=n) d = 3 diago = diag(d) P =[] fig = plt.figure() center = 0.5*np.ones(d) k = 2 #index of the diagonal where you want to project ax = fig.add_subplot(111, projection='3d') ax.scatter(U[:, 0], U[:, 1], U[:, 2], c='g', marker='o') for i in range(n): P = diago.proj(U[i],k) ax.scatter(P[0,0],P[0,1],P[0,2], c='r', marker='o') ax.plot([U[i,0], P[0,0]],[U[i,1], P[0,1]],[U[i,2], P[0,2]], c='k') diagonal = diago.list_of_diag[k] ax.plot([diagonal[0][0],diagonal[1][0]], [diagonal[0][1],diagonal[1][1]],[diagonal[0][2],diagonal[1][2]], c='b') ax.set_xlabel(dimkeys[0]) ax.set_ylabel(dimkeys[1]) ax.set_zlabel(dimkeys[2]) plt.show()
def test_normal_distribution(self): mu = 0 sigma = 1 m = 10000 mydistr = UnivariateNormalDistribution(0, 1) rank_data = mu + sigma * np.random.randn(10000) rank = RankHistogram(mydistr, rank_data, 25) rank.plot()
def test_gaussian_copula2d(self): n = 10000 dimkeys = ["solar", "wind"] dimension = len(dimkeys) ourmean = [2, 3] ourmeandict = {"solar": 0, "wind": 0} rho = 0.5 rho2 = 0.7 ourcov = [[1, rho], [rho, 1]] ourcov2 = [[1, rho2], [rho2, 1]] marginals = {"solar": UnivariateNormalDistribution(var=ourcov[0][0], mean=ourmean[0]), "wind": UnivariateNormalDistribution(var=ourcov[1][1], mean=ourmean[1])} data_array = np.random.multivariate_normal(ourmean, ourcov, 100000) data_array2 = np.random.multivariate_normal(ourmean, ourcov2, 100000) data_dict = dict.fromkeys(dimkeys) for i in range(dimension): data_dict[dimkeys[i]] = data_array[:, i] data_dict2 = dict.fromkeys(dimkeys) for i in range(dimension): data_dict2[dimkeys[i]] = data_array2[:, i] gumbel = GumbelCopula(dimkeys, data_dict, marginals) frank = FrankCopula(dimkeys, data_dict, marginals) clayton = ClaytonCopula(dimkeys, data_dict, marginals) student = StudentCopula(dimkeys, data_dict, marginals) multigaussian1 = GaussianCopula(dimkeys=dimkeys, input_data=data_dict, marginals=marginals, quadstep=0.001) multigaussian2 = GaussianCopula(dimkeys=dimkeys, input_data=data_dict, marginals=marginals, quadstep=0.001, cov=ourcov2) multigaussian3 = GaussianCopula(dimkeys=dimkeys, input_data=data_dict2, marginals=marginals, quadstep=0.001, cov=ourcov2) multigaussian4 = GaussianCopula(dimkeys=dimkeys, input_data=data_dict2, marginals=marginals, quadstep=0.001, cov=ourcov) l1=multigaussian1.c_log_likelihood() self.assertGreater(l1,multigaussian2.c_log_likelihood()) self.assertGreater(multigaussian3.c_log_likelihood(),multigaussian4.c_log_likelihood()) self.assertGreater(l1,gumbel.c_log_likelihood()) self.assertGreater(l1, clayton.c_log_likelihood()) self.assertGreater(l1, frank.c_log_likelihood()) self.assertGreater(l1, student.c_log_likelihood())
def test_two_dimensions(self): dimkeys = ["solar", "wind"] dimension = len(dimkeys) ourmean = [-4, 3] ourcov = [[2, 0], [0, 2]] lowerdict = {"solar": -1, "wind": 0} upperdict = {"solar": 3, "wind": 4} marginals = {"solar": UnivariateNormalDistribution(var=ourcov[0][0], mean=ourmean[0]), "wind": UnivariateNormalDistribution(var=ourcov[1][1], mean=ourmean[1])} data_array = np.random.multivariate_normal(ourmean, ourcov, 10000) data_dict = dict.fromkeys(dimkeys) for i in range(dimension): data_dict[dimkeys[i]] = data_array[:, i] dist = MultiNormalDistribution(dimkeys,input_data=data_dict) dist2 = MultiNormalDistribution(dimkeys,mean=ourmean,cov=ourcov) self.assertAlmostEqual(dist.rect_prob(lowerdict,upperdict),dist2.rect_prob(lowerdict,upperdict),2) self.assertAlmostEqual(np.mean(dist.generates_X(n=1000)[:,1]),ourmean[1],1) self.assertAlmostEqual(np.mean(dist.generates_X(n=1000)[:, 0]), ourmean[0], 1)
def test_with_gaussian_copula_1_dim(self): mymean = 0 myvar = 2 dimkeys1 = ["solar"] lowerdict = {"solar": -2} upperdict = {"solar": 1} data_array1 = np.random.multivariate_normal([mymean], [[myvar]], 10000) data_dict1 = {"solar": data_array1[:, 0]} marginals1 = {"solar": UnivariateNormalDistribution(input_data=data_array1[:, 0])} unigaussian1 = GaussianCopula(input_data=data_dict1, dimkeys=dimkeys1, marginals=marginals1) unigaussian2 = MultiNormalDistribution(dimkeys1, input_data=data_dict1) self.assertAlmostEqual(unigaussian1.rect_prob(lowerdict, upperdict),unigaussian2.rect_prob(lowerdict, upperdict),3)
def test_with_mean_var(self): sigma = 2 mean = 3 data = sigma*np.random.randn(10000)+mean dist = UnivariateNormalDistribution(input_data=data) self.assertAlmostEqual(dist.cdf(4),0.6915,1) dist = UnivariateNormalDistribution(mean = mean,var=sigma**2) self.assertAlmostEqual(dist.cdf(4),0.6915,3)
def test_with_gaussian_copula_2_dim(self): dimkeys = ["solar", "wind"] dimension = len(dimkeys) ourmean = [3, 4] ourmeandict = {"solar": 0, "wind": 0} ourcov = [[1, 0.5], [0.5, 1]] marginals = {"solar": UnivariateNormalDistribution(var=ourcov[0][0], mean=ourmean[0]), "wind": UnivariateNormalDistribution(var=ourcov[1][1], mean=ourmean[1])} valuedict = {"solar": 0, "wind": 0} lowerdict = {"solar": 2, "wind": 3} upperdict = {"solar": 4, "wind": 5} data_array = np.random.multivariate_normal(ourmean, ourcov, 100000) data_dict = dict.fromkeys(dimkeys) for i in range(dimension): data_dict[dimkeys[i]] = data_array[:, i] multigaussian1 = GaussianCopula(input_data=data_dict, dimkeys=dimkeys, marginals=marginals, quadstep=0.001) multigaussian2 = MultiNormalDistribution(dimkeys, input_data=data_dict) valuedict = {"solar": 0.45, "wind": 0.89} self.assertAlmostEqual(multigaussian1.rect_prob(lowerdict, upperdict), multigaussian2.rect_prob(lowerdict, upperdict), 3)
def test_quick(self): data = np.random.randn(1000) dist = UnivariateNormalDistribution(input_data=data) self.assertAlmostEqual(dist.rect_prob(-1.96,1.96),0.95,1)
def test_pdf_cdf(self): x = -2 + 2 * np.random.randn(2000) mydistr = UnivariateNormalDistribution(input_data=x) res, i = spi.quad(mydistr.pdf, -1, 3) self.assertAlmostEqual(res,mydistr.rect_prob(-1, 3),5)