def setUp(self): self.prng = prng = np.random.RandomState(10) self.n_components = n_components = 3 self.n_features = n_features = 3 self.startprob = prng.rand(n_components) self.startprob = self.startprob / self.startprob.sum() self.transmat = prng.rand(n_components, n_components) self.transmat /= np.tile(self.transmat.sum(axis=1)[:, np.newaxis], (1, n_components)) self.means = prng.randint(-20, 20, (n_components, n_features)) self.covars = { 'spherical': (1.0 + 2 * np.dot(prng.rand(n_components, 1), np.ones((1, n_features)))) ** 2, 'tied': (make_spd_matrix(n_features, random_state=0) + np.eye(n_features)), 'diag': (1.0 + 2 * prng.rand(n_components, n_features)) ** 2, 'full': np.array([make_spd_matrix(n_features, random_state=0) + np.eye(n_features) for x in range(n_components)]), } self.expanded_covars = { 'spherical': [np.eye(n_features) * cov for cov in self.covars['spherical']], 'diag': [np.diag(cov) for cov in self.covars['diag']], 'tied': [self.covars['tied']] * n_components, 'full': self.covars['full'], }
def __init__(self, rng, n_samples=500, n_components=2, n_features=2, scale=50): self.n_samples = n_samples self.n_components = n_components self.n_features = n_features self.weights = rng.rand(n_components) self.weights = self.weights / self.weights.sum() self.means = rng.rand(n_components, n_features) * scale self.covariances = { 'spherical': .5 + rng.rand(n_components), 'diag': (.5 + rng.rand(n_components, n_features)) ** 2, 'tied': make_spd_matrix(n_features, random_state=rng), 'full': np.array([ make_spd_matrix(n_features, random_state=rng) * .5 for _ in range(n_components)])} self.precisions = { 'spherical': 1. / self.covariances['spherical'], 'diag': 1. / self.covariances['diag'], 'tied': linalg.inv(self.covariances['tied']), 'full': np.array([linalg.inv(covariance) for covariance in self.covariances['full']])} self.X = dict(zip(COVARIANCE_TYPE, [generate_data( n_samples, n_features, self.weights, self.means, self.covariances, covar_type) for covar_type in COVARIANCE_TYPE])) self.Y = np.hstack([np.full(int(np.round(w * n_samples)), k, dtype=np.int) for k, w in enumerate(self.weights)])
def __init__(self, rng, n_samples=500, n_components=2, n_features=2, scale=50): self.n_samples = n_samples self.n_components = n_components self.n_features = n_features self.weights = rng.rand(n_components) self.weights = self.weights / self.weights.sum() self.means = rng.rand(n_components, n_features) * scale self.covariances = { 'spherical': .5 + rng.rand(n_components), 'diag': (.5 + rng.rand(n_components, n_features)) ** 2, 'tied': make_spd_matrix(n_features, random_state=rng), 'full': np.array([ make_spd_matrix(n_features, random_state=rng) * .5 for _ in range(n_components)])} self.precisions = { 'spherical': 1. / self.covariances['spherical'], 'diag': 1. / self.covariances['diag'], 'tied': linalg.inv(self.covariances['tied']), 'full': np.array([linalg.inv(covariance) for covariance in self.covariances['full']])} self.X = dict(zip(COVARIANCE_TYPE, [generate_data( n_samples, n_features, self.weights, self.means, self.covariances, covar_type) for covar_type in COVARIANCE_TYPE])) self.Y = np.hstack([k * np.ones(int(np.round(w * n_samples))) for k, w in enumerate(self.weights)])
class GaussianHMMParams(object): n_components = 3 n_features = 3 startprob = prng.rand(n_components) startprob = startprob / startprob.sum() transmat = np.random.rand(n_components, n_components) transmat /= np.tile(transmat.sum(axis=1)[:, np.newaxis], (1, n_components)) means = prng.randint(-20, 20, (n_components, n_features)) covars = { 'spherical': (1.0 + 2 * prng.rand(n_components))**2, 'tied': (make_spd_matrix(n_features, random_state=0) + np.eye(n_features)), 'diag': (1.0 + 2 * prng.rand(n_components, n_features))**2, 'full': np.array([ make_spd_matrix(n_features, random_state=0) + np.eye(n_features) for x in xrange(n_components) ]) } expanded_covars = { 'spherical': [np.eye(n_features) * cov for cov in covars['spherical']], 'diag': [np.diag(cov) for cov in covars['diag']], 'tied': [covars['tied']] * n_components, 'full': covars['full'] }
def make_covar_matrix(covariance_type, n_components, n_features): mincv = 0.1 rand = np.random.random return { 'spherical': (mincv + mincv * np.dot(rand( (n_components, 1)), np.ones((1, n_features))))**2, 'tied': (make_spd_matrix(n_features) + mincv * np.eye(n_features)), 'diag': (mincv + mincv * rand((n_components, n_features)))**2, 'full': np.array([(make_spd_matrix(n_features) + mincv * np.eye(n_features)) for x in range(n_components)]) }[covariance_type]
def make_covar_matrix(covariance_type, n_components, n_features): mincv = 0.1 rand = np.random.random return { 'spherical': (mincv + mincv * np.dot(rand((n_components, 1)), np.ones((1, n_features)))) ** 2, 'tied': (make_spd_matrix(n_features) + mincv * np.eye(n_features)), 'diag': (mincv + mincv * rand((n_components, n_features))) ** 2, 'full': np.array([(make_spd_matrix(n_features) + mincv * np.eye(n_features)) for x in range(n_components)]) }[covariance_type]
def make_covar_matrix(covariance_type, n_components, n_features): mincv = 0.1 rand = np.random.random if covariance_type == 'spherical': return (mincv + mincv * rand((n_components, )))**2 elif covariance_type == 'tied': return (make_spd_matrix(n_features) + mincv * np.eye(n_features)) elif covariance_type == 'diag': return (mincv + mincv * rand((n_components, n_features)))**2 elif covariance_type == 'full': return np.array([ (make_spd_matrix(n_features) + mincv * np.eye(n_features)) for x in range(n_components) ])
def make_covar_matrix(covariance_type, n_components, n_features): mincv = 0.1 rand = np.random.random if covariance_type == 'spherical': return (mincv + mincv * rand((n_components,))) ** 2 elif covariance_type == 'tied': return (make_spd_matrix(n_features) + mincv * np.eye(n_features)) elif covariance_type == 'diag': return (mincv + mincv * rand((n_components, n_features))) ** 2 elif covariance_type == 'full': return np.array([(make_spd_matrix(n_features) + mincv * np.eye(n_features)) for x in range(n_components)])
def create_random_gmm(n_mix, n_features, covariance_type, prng=0): prng = check_random_state(prng) g = mixture.GMM(n_mix, covariance_type=covariance_type) g.means_ = prng.randint(-20, 20, (n_mix, n_features)) mincv = 0.1 g.covars_ = { "spherical": (mincv + mincv * np.dot(prng.rand(n_mix, 1), np.ones((1, n_features)))) ** 2, "tied": (make_spd_matrix(n_features, random_state=prng) + mincv * np.eye(n_features)), "diag": (mincv + mincv * prng.rand(n_mix, n_features)) ** 2, "full": np.array( [make_spd_matrix(n_features, random_state=prng) + mincv * np.eye(n_features) for x in range(n_mix)] ), }[covariance_type] g.weights_ = hmm.normalize(prng.rand(n_mix)) return g
def _setUp(self): self.n_components = 10 self.n_features = 4 self.weights = rng.rand(self.n_components) self.weights = self.weights / self.weights.sum() self.means = rng.randint(-20, 20, (self.n_components, self.n_features)) self.threshold = -0.5 self.I = np.eye(self.n_features) self.covars = { "spherical": (0.1 + 2 * rng.rand(self.n_components, self.n_features)) ** 2, "tied": (make_spd_matrix(self.n_features, random_state=0) + 5 * self.I), "diag": (0.1 + 2 * rng.rand(self.n_components, self.n_features)) ** 2, "full": np.array( [make_spd_matrix(self.n_features, random_state=0) + 5 * self.I for x in range(self.n_components)] ), }
def create_random_gmm(n_mix, n_features, covariance_type, prng=0): prng = check_random_state(prng) g = mixture.GMM(n_mix, covariance_type=covariance_type) g.means_ = prng.randint(-20, 20, (n_mix, n_features)) mincv = 0.1 g.covars_ = { 'spherical': (mincv + mincv * np.dot(prng.rand(n_mix, 1), np.ones((1, n_features)))) ** 2, 'tied': (make_spd_matrix(n_features, random_state=prng) + mincv * np.eye(n_features)), 'diag': (mincv + mincv * prng.rand(n_mix, n_features)) ** 2, 'full': np.array( [make_spd_matrix(n_features, random_state=prng) + mincv * np.eye(n_features) for x in range(n_mix)]) }[covariance_type] g.weights_ = hmm.normalize(prng.rand(n_mix)) return g
def _setUp(self): self.n_components = 10 self.n_features = 4 self.weights = rng.rand(self.n_components) self.weights = self.weights / self.weights.sum() self.means = rng.randint(-20, 20, (self.n_components, self.n_features)) self.threshold = -0.5 self.I = np.eye(self.n_features) self.covars = { 'spherical': (0.1 + 2 * rng.rand(self.n_components, self.n_features)) ** 2, 'tied': (make_spd_matrix(self.n_features, random_state=0) + 5 * self.I), 'diag': (0.1 + 2 * rng.rand(self.n_components, self.n_features)) ** 2, 'full': np.array([make_spd_matrix(self.n_features, random_state=0) + 5 * self.I for x in range(self.n_components)])}
def make_covar_matrix(covariance_type, n_components, n_features, random_state=None): mincv = 0.1 prng = check_random_state(random_state) if covariance_type == 'spherical': return (mincv + mincv * prng.random_sample((n_components, )))**2 elif covariance_type == 'tied': return (make_spd_matrix(n_features) + mincv * np.eye(n_features)) elif covariance_type == 'diag': return (mincv + mincv * prng.random_sample( (n_components, n_features)))**2 elif covariance_type == 'full': return np.array([(make_spd_matrix(n_features, random_state=prng) + mincv * np.eye(n_features)) for x in range(n_components)])
def create_random_gmm(n_mix, n_features, cvtype, prng=prng): from sklearn import mixture g = mixture.GMM(n_mix, cvtype=cvtype) g.means = prng.randint(-20, 20, (n_mix, n_features)) mincv = 0.1 g.covars = { 'spherical': (mincv + mincv * prng.rand(n_mix)) ** 2, 'tied': (make_spd_matrix(n_features, random_state=prng) + mincv * np.eye(n_features)), 'diag': (mincv + mincv * prng.rand(n_mix, n_features)) ** 2, 'full': np.array( [make_spd_matrix(n_features, random_state=prng) + mincv * np.eye(n_features) for x in xrange(n_mix)]) }[cvtype] g.weights = hmm.normalize(prng.rand(n_mix)) return g
def _setUp(self): self.n_components = 10 self.n_features = 4 self.weights = rng.rand(self.n_components) self.weights = self.weights / self.weights.sum() self.means = rng.randint(-20, 20, (self.n_components, self.n_features)) self.threshold = -0.5 self.I = np.eye(self.n_features) self.covars = {'spherical': (0.1 + 2 * \ rng.rand(self.n_components, self.n_features)) ** 2, 'tied': make_spd_matrix(self.n_features, random_state=0) +\ 5 * self.I, 'diag': (0.1 + 2 * rng.rand(self.n_components,\ self.n_features)) ** 2, 'full': np.array([make_spd_matrix(self.n_features,\ random_state=0) + 5 * self.I for x in range(self.n_components)])}
def make_covar_matrix(covariance_type, n_components, n_features, random_state=None): mincv = 0.1 prng = check_random_state(random_state) if covariance_type == 'spherical': return (mincv + mincv * prng.random_sample((n_components,))) ** 2 elif covariance_type == 'tied': return (make_spd_matrix(n_features) + mincv * np.eye(n_features)) elif covariance_type == 'diag': return (mincv + mincv * prng.random_sample((n_components, n_features))) ** 2 elif covariance_type == 'full': return np.array([ (make_spd_matrix(n_features, random_state=prng) + mincv * np.eye(n_features)) for x in range(n_components) ])
def create_random_gmm(n_mix, n_features, cvtype, prng=prng): from sklearn import mixture g = mixture.GMM(n_mix, cvtype=cvtype) g.means = prng.randint(-20, 20, (n_mix, n_features)) mincv = 0.1 g.covars = { 'spherical': (mincv + mincv * prng.rand(n_mix))**2, 'tied': (make_spd_matrix(n_features, random_state=prng) + mincv * np.eye(n_features)), 'diag': (mincv + mincv * prng.rand(n_mix, n_features))**2, 'full': np.array([ make_spd_matrix(n_features, random_state=prng) + mincv * np.eye(n_features) for x in xrange(n_mix) ]) }[cvtype] g.weights = hmm.normalize(prng.rand(n_mix)) return g
def calculate_covariance(states, feature_list, n_features): # due to shortage of data we can't calculate the covariance matrix, that's why we return random np.set_printoptions(threshold='nan') random = np.array([make_spd_matrix(n_features, random_state=0) + np.eye(n_features) for x in range(len(states))]) # covariance = list() # for i in range(0, len(states), 1): # state = states[i] # f_list_da = feature_list[state] # # feat_transpose = np.transpose(f_list_da) # arr = np.cov(np.array(f_list_da), rowvar=0) # # adjusted_cov = arr + 0.2*np.identity(arr.shape[0]) # if np.isnan(arr).all(): # arr = random[i] # # arr_tr = np.transpose(arr) # # new_arr = np.multiply(arr, arr_tr) # # # arr[arr == 0.] = 0.00001 # # covariance.append(new_arr) # # diagonal = arr.diagonal() # # print (arr.transpose() == arr).all() # a = 0 # while not is_pos_def(arr): # arr += 0.2 # a += 1 # if a == 10: # arr = random[i] # if not (arr.transpose() == arr).all(): # arr = make_summetric(arr) # # np.linalg.cholesky(arr) # covariance.append(arr) # # t = np.linalg.cholesky(adjusted_cov) # covariance = np.array(covariance) # # np.linalg.cholesky(covariance) # # covariance_tr = np.transpose(covariance) # # cov = np.multiply(covariance, covariance_tr) # return covariance return random
class GMMTester(): do_test_eval = True n_components = 10 n_features = 4 weights = rng.rand(n_components) weights = weights / weights.sum() means = rng.randint(-20, 20, (n_components, n_features)) threshold = -0.5 I = np.eye(n_features) covars = { 'spherical': (0.1 + 2 * rng.rand(n_components, n_features))**2, 'tied': make_spd_matrix(n_features, random_state=0) + 5 * I, 'diag': (0.1 + 2 * rng.rand(n_components, n_features))**2, 'full': np.array([ make_spd_matrix(n_features, random_state=0) + 5 * I for x in xrange(n_components) ]) } def test_eval(self): if not self.do_test_eval: return # DPGMM does not support setting the means and # covariances before fitting There is no way of fixing this # due to the variational parameters being more expressive than # covariance matrices g = self.model(n_components=self.n_components, covariance_type=self.covariance_type, random_state=rng) # Make sure the means are far apart so responsibilities.argmax() # picks the actual component used to generate the observations. g.means_ = 20 * self.means g.covars_ = self.covars[self.covariance_type] g.weights_ = self.weights gaussidx = np.repeat(range(self.n_components), 5) n_samples = len(gaussidx) X = rng.randn(n_samples, self.n_features) + g.means_[gaussidx] ll, responsibilities = g.eval(X) self.assertEqual(len(ll), n_samples) self.assertEqual(responsibilities.shape, (n_samples, self.n_components)) assert_array_almost_equal(responsibilities.sum(axis=1), np.ones(n_samples)) assert_array_equal(responsibilities.argmax(axis=1), gaussidx) def test_sample(self, n=100): g = self.model(n_components=self.n_components, covariance_type=self.covariance_type, random_state=rng) # Make sure the means are far apart so responsibilities.argmax() # picks the actual component used to generate the observations. g.means_ = 20 * self.means g.covars_ = np.maximum(self.covars[self.covariance_type], 0.1) g.weights_ = self.weights samples = g.sample(n) self.assertEquals(samples.shape, (n, self.n_features)) def test_train(self, params='wmc'): g = mixture.GMM(n_components=self.n_components, covariance_type=self.covariance_type) g.weights_ = self.weights g.means_ = self.means g.covars_ = 20 * self.covars[self.covariance_type] # Create a training set by sampling from the predefined distribution. X = g.sample(n_samples=100) g = self.model(n_components=self.n_components, covariance_type=self.covariance_type, random_state=rng, min_covar=1e-1) g.fit(X, n_iter=1, init_params=params) # Do one training iteration at a time so we can keep track of # the log likelihood to make sure that it increases after each # iteration. trainll = [] for iter in xrange(5): g.fit(X, n_iter=1, params=params, init_params='') trainll.append(self.score(g, X)) g.fit(X, n_iter=10, params=params, init_params='') # finish fitting # Note that the log likelihood will sometimes decrease by a # very small amount after it has more or less converged due to # the addition of min_covar to the covariance (to prevent # underflow). This is why the threshold is set to -0.5 # instead of 0. delta_min = np.diff(trainll).min() self.assertTrue( delta_min > self.threshold, "The min nll increase is %f which is lower than the admissible" " threshold of %f, for model %s. The likelihoods are %s." % (delta_min, self.threshold, self.covariance_type, trainll)) def test_train_degenerate(self, params='wmc'): """ Train on degenerate data with 0 in some dimensions """ # Create a training set by sampling from the predefined distribution. X = rng.randn(100, self.n_features) X.T[1:] = 0 g = self.model(n_components=2, covariance_type=self.covariance_type, random_state=rng, min_covar=1e-3) g.fit(X, n_iter=5, init_params=params) trainll = g.score(X) self.assertTrue(np.sum(np.abs(trainll / 100 / X.shape[1])) < 5) def test_train_1d(self, params='wmc'): """ Train on 1-D data """ # Create a training set by sampling from the predefined distribution. X = rng.randn(100, 1) #X.T[1:] = 0 g = self.model(n_components=2, covariance_type=self.covariance_type, random_state=rng, min_covar=1e-7) g.fit(X, n_iter=5, init_params=params) trainll = g.score(X) if isinstance(g, mixture.DPGMM): self.assertTrue(np.sum(np.abs(trainll / 100)) < 5) else: self.assertTrue(np.sum(np.abs(trainll / 100)) < 2) def score(self, g, X): return g.score(X).sum()