def mstep(self, gamma, x, u, weights=None, use_prior=False): aux = [] if weights: for _w, _gamma in zip(weights, gamma): aux.append(_w[:, None] * _gamma) gamma = aux xs, ys, ws = [], [], [] for _x, _u, _w in zip(x, u, gamma): _feat = self.featurize(_x) xs.append(np.hstack((_feat, np.ones((_feat.shape[0], 1))))) ys.append(_u) ws.append(_w) _cov = np.zeros((self.nb_states, self.dm_act, self.dm_act)) for k in range(self.nb_states): coef_, sigma = linear_regression(Xs=np.vstack(xs), ys=np.vstack(ys), weights=np.vstack(ws)[:, k], fit_intercept=False, **self.prior if use_prior else {}) self.K[k, ...] = coef_[:, :self.dm_feat] self.kff[k, ...] = coef_[:, -1] _cov[k, ...] = sigma # usage = sum([_gamma.sum(0) for _gamma in gamma]) # unused = np.where(usage < 1)[0] # used = np.where(usage > 1)[0] # if len(unused) > 0: # for k in unused: # i = npr.choice(used) # self.K[k] = self.K[i] + 0.01 * npr.randn(*self.K[i].shape) # self.kff[k] = self.kff[i] + 0.01 * npr.randn(*self.kff[i].shape) # _cov[k] = _cov[i] self.cov = _cov
def initialize(self, x, u, **kwargs): localize = kwargs.get('localize', True) feat = [self.featurize(_x) for _x in x] Ts = [_x.shape[0] for _x in x] if localize: from sklearn.cluster import KMeans km = KMeans(self.nb_states) km.fit(np.hstack((np.vstack(feat), np.vstack(u)))) zs = np.split(km.labels_, np.cumsum(Ts)[:-1]) else: zs = [npr.choice(self.nb_states, size=T) for T in Ts] _cov = np.zeros((self.nb_states, self.dm_act, self.dm_act)) for k in range(self.nb_states): ts = [np.where(z == k)[0] for z in zs] xs = [_feat[t, :] for t, _feat in zip(ts, feat)] ys = [_u[t, :] for t, _u in zip(ts, u)] coef_, intercept_, sigma = linear_regression(np.vstack(xs), np.vstack(ys), weights=None, fit_intercept=True, **self.prior) self.K[k, ...] = coef_[:, :self.dm_feat] self.kff[k, :] = intercept_ _cov[k, ...] = sigma self.cov = _cov
def initialize(self, x, u, **kwargs): localize = kwargs.get('localize', True) Ts = [_x.shape[0] for _x in x] if localize: from sklearn.cluster import KMeans km = KMeans(self.nb_states) km.fit(np.hstack((np.vstack(x), np.vstack(u)))) zs = np.split(km.labels_, np.cumsum(Ts)[:-1]) zs = [z[:-1] for z in zs] else: zs = [npr.choice(self.nb_states, size=T - 1) for T in Ts] _cov = np.zeros((self.nb_states, self.dm_obs, self.dm_obs)) for k in range(self.nb_states): ts = [np.where(z == k)[0] for z in zs] xs = [ np.hstack((_x[t, :], _u[t, :])) for t, _x, _u in zip(ts, x, u) ] ys = [_x[t + 1, :] for t, _x in zip(ts, x)] coef_, intercept_, sigma = linear_regression(np.vstack(xs), np.vstack(ys), weights=None, fit_intercept=True, **self.prior) self.A[k, ...] = coef_[:, :self.dm_obs] self.B[k, ...] = coef_[:, self.dm_obs:] self.c[k, :] = intercept_ _cov[k, ...] = sigma self.cov = _cov