from scipy import stats import numpy as np from statsmodels.sandbox.distributions.mixture_rvs import mixture_rvs from statsmodels.nonparametric.kde import (kdensity, kdensityfft) import matplotlib.pyplot as plt np.random.seed(12345) obs_dist = mixture_rvs([.25, .75], size=10000, dist=[stats.norm, stats.norm], kwargs=(dict(loc=-1, scale=.5), dict(loc=1, scale=.5))) #.. obs_dist = mixture_rvs([.25,.75], size=10000, dist=[stats.norm, stats.beta], #.. kwargs = (dict(loc=-1,scale=.5),dict(loc=1,scale=1,args=(1,.5)))) f_hat, grid, bw = kdensityfft(obs_dist, kernel="gauss", bw="scott") # Check the plot plt.figure() plt.hist(obs_dist, bins=50, normed=True, color='red') plt.plot(grid, f_hat, lw=2, color='black') plt.show() # do some timings # get bw first because they're not streamlined from statsmodels.nonparametric import bandwidths bw = bandwidths.bw_scott(obs_dist) #.. timeit kdensity(obs_dist, kernel="gauss", bw=bw, gridsize=2**10) #.. timeit kdensityfft(obs_dist, kernel="gauss", bw=bw, gridsize=2**10)
if __name__ == '__main__': examples = ['chebyt', 'fourier', 'hermite']#[2] nobs = 10000 import matplotlib.pyplot as plt from statsmodels.sandbox.distributions.mixture_rvs import ( mixture_rvs, MixtureDistribution) #np.random.seed(12345) ## obs_dist = mixture_rvs([1/3.,2/3.], size=nobs, dist=[stats.norm, stats.norm], ## kwargs = (dict(loc=-1,scale=.5),dict(loc=1,scale=.75))) mix_kwds = (dict(loc=-0.5,scale=.5),dict(loc=1,scale=.2)) obs_dist = mixture_rvs([1/3.,2/3.], size=nobs, dist=[stats.norm, stats.norm], kwargs=mix_kwds) mix = MixtureDistribution() #obs_dist = np.random.randn(nobs)/4. #np.sqrt(2) if "chebyt_" in examples: # needed for Cheby example below #obs_dist = np.clip(obs_dist, -2, 2)/2.01 #chebyt [0,1] obs_dist = obs_dist[(obs_dist>-2) & (obs_dist<2)]/2.0 #/4. + 2/4.0 #fourier [0,1] #obs_dist = obs_dist[(obs_dist>-2) & (obs_dist<2)]/4. + 2/4.0 f_hat, grid, coeffs, polys = density_orthopoly(obs_dist, ChebyTPoly, order=20, xeval=None) #f_hat /= f_hat.sum() * (grid.max() - grid.min())/len(grid) f_hat0 = f_hat from scipy import integrate
import numpy as np from statsmodels.sandbox.distributions.mixture_rvs import mixture_rvs from statsmodels.nonparametric.kde import KDE from scipy import stats # get results from Stata curdir = os.path.dirname(os.path.abspath(__file__)) rfname = os.path.join(curdir,'results','results_kde.csv') #print rfname KDEResults = np.genfromtxt(open(rfname, 'rb'), delimiter=",", names=True) # setup test data np.random.seed(12345) Xi = mixture_rvs([.25,.75], size=200, dist=[stats.norm, stats.norm], kwargs = (dict(loc=-1,scale=.5),dict(loc=1,scale=.5))) class CheckKDE(object): decimal_density = 7 def test_density(self): npt.assert_almost_equal(self.res1.density, self.res_density, self.decimal_density) class TestKDEGauss(CheckKDE): @classmethod def setupClass(cls): res1 = KDE(Xi) res1.fit(kernel="gau", fft=False, bw="silverman") cls.res1 = res1 cls.res_density = KDEResults["gau_d"]