def GenerateDataset(Size, Nfeat, Npoints, Nneurons): X, Y = [], [] for i in range(Size): # generate random ffnn #P = FFNN_Parameters(Nfeat, Nneurons, 1, 20) x = rnd(Npoints, Nfeat)*20 #P[2] = np.abs(P[2]) #y = np.dot( np.tanh( np.dot(x, P[0]) + P[1]), P[2] ) pr = np.random.randn(x.shape[1], 1) pr = pr > 0 y = np.dot(x, pr)*0.5 xy = np.concatenate((x,y), axis=1) """ W = np.vstack((P[0], [P[1]] , np.transpose( P[2] ))) # W = np.transpose(W) W = W[np.lexsort(np.fliplr(W).T)] W = np.transpose(W)""" xval = xy.flatten() yval = pr.flatten() if X == []: X = np.zeros((Size, xval.shape[0])) Y = np.zeros((Size, yval.shape[0])) X[i,:] = xval Y[i,:] = yval return X,Y
def _process_inputs(self, X, E=None, lengths=None): if self.n_features == 1: lagged = None if lengths is None: lagged = lagmat(X, maxlag=self.n_lags, trim='forward', original='ex') else: lagged = np.zeros((len(X), self.n_lags)) for i, j in iter_from_X_lengths(X, lengths): lagged[i:j, :] = lagmat(X[i:j], maxlag=self.n_lags, trim='forward', original='ex') return {'obs': X.reshape(-1,1), 'lagged': lagged.reshape(-1, self.n_features, self.n_lags)} else: lagged = None lagged = np.zeros((X.shape[0], self.n_features, self.n_lags)) if lengths is None: tem = lagmat(X, maxlag=self.n_lags, trim='forward', original='ex') for sample in range(X.shape[0]): lagged[sample] = np.reshape\ (tem[sample], (self.n_features, self.n_lags), 'F') else: for i, j in iter_from_X_lengths(X, lengths): lagged[i:j, :] = lagmat(X[i:j], maxlag=self.n_lags, trim='forward', original='ex') lagged.reshape(-1, self.n_featurs, self.n_lags) return {'obs': X, 'lagged': lagged}
def adam_minimax(grad_both, init_params_max, init_params_min, callback=None, num_iters=100, step_size_max=0.001, step_size_min=0.001, b1=0.9, b2=0.999, eps=10**-8): """Adam modified to do minimiax optimization, for instance to help with training generative adversarial networks.""" x_max, unflatten_max = flatten(init_params_max) x_min, unflatten_min = flatten(init_params_min) m_max = np.zeros(len(x_max)) v_max = np.zeros(len(x_max)) m_min = np.zeros(len(x_min)) v_min = np.zeros(len(x_min)) for i in range(num_iters): g_max_uf, g_min_uf = grad_both(unflatten_max(x_max), unflatten_min(x_min), i) g_max, _ = flatten(g_max_uf) g_min, _ = flatten(g_min_uf) if callback: callback(unflatten_max(x_max), unflatten_min(x_min), i, unflatten_max(g_max), unflatten_min(g_min)) m_max = (1 - b1) * g_max + b1 * m_max # First moment estimate. v_max = (1 - b2) * (g_max**2) + b2 * v_max # Second moment estimate. mhat_max = m_max / (1 - b1**(i + 1)) # Bias correction. vhat_max = v_max / (1 - b2**(i + 1)) x_max = x_max + step_size_max * mhat_max / (np.sqrt(vhat_max) + eps) m_min = (1 - b1) * g_min + b1 * m_min # First moment estimate. v_min = (1 - b2) * (g_min**2) + b2 * v_min # Second moment estimate. mhat_min = m_min / (1 - b1**(i + 1)) # Bias correction. vhat_min = v_min / (1 - b2**(i + 1)) x_min = x_min - step_size_min * mhat_min / (np.sqrt(vhat_min) + eps) return unflatten_max(x_max), unflatten_min(x_min)
def get_dxopt_delta_p(lin_solver, df_dx, d_dp_df_dx, d_dx_df_dx, A, b, xopt, p, delta_p_direction): # f(x, p) should be convex x_len = A.shape[1] # get tight constraints A_tight, b_tight = get_tight_constraints(A, b, xopt) num_tight = A_tight.shape[0] # get d p_dim = len(delta_p_direction.shape) delta_p_direction_broadcasted = np.tile(delta_p_direction, tuple([x_len] + [1 for i in xrange(p_dim)])) d_top = -np.sum(d_dp_df_dx(p, xopt) * delta_p_direction_broadcasted, axis=tuple(range(1,1+p_dim))) d_bottom = np.zeros(num_tight) d = np.hstack((d_top,d_bottom)) # get C C = np.vstack((np.hstack((d_dx_df_dx(xopt, p), -A_tight.T)), np.hstack((A_tight, np.zeros((num_tight, num_tight)))))) # get deriv deriv = lin_solver(C, d) # print 'solver error:', np.linalg.norm(np.dot(C,deriv) - d) return deriv
def projectParam_vec(param, N, D, G, M, K, lb=1e-6): # unpack the input parameter vector tp_1 = [0, M, 2*M, 3*M, 4*M, 4*M+M*G, 4*M+M*G+G, 4*M+M*G+2*G, 4*M+M*G+2*G+G*N*K, 4*M+M*G+2*G+G*(N+D)*K, 4*M+M*G+2*G+G*(N+2*D)*K, 4*M+M*G+2*G+G*(N+2*D+1)*K, 4*M+M*G+2*G+G*(N+2*D+2)*K] tp_2 = [] for i in np.arange(len(tp_1)-1): tp_2.append(param[tp_1[i] : tp_1[i+1]]) [tau_a1, tau_a2, tau_b1, tau_b2, phi, tau_v1, tau_v2, eta, mu_w, sigma_w,\ mu_b, sigma_b] = tp_2 phi = np.reshape(phi, (M,G)) eta = np.reshape(eta, (G,N,K)) # apply projections w_tau_ab = projectLB(np.concatenate((tau_a1,tau_a2,tau_b1,tau_b2)), lb) w_phi = np.zeros((M,G)) for m in np.arange(M): w_phi[m] = projectSimplex_vec(phi[m]) w_tau_v = projectLB(np.concatenate((tau_v1,tau_v2)), lb) w_eta = np.zeros((G,N,K)) for g in np.arange(G): for n in np.arange(N): w_eta[g,n] = projectSimplex_vec(eta[g,n]) w = np.concatenate((w_tau_ab, w_phi.reshape(M*G), w_tau_v, \ w_eta.reshape(G*N*K), mu_w, projectLB(sigma_w,lb), mu_b, \ projectLB(sigma_b,lb))) return w
def adam(grad, x, batch_id=None, num_batches=None, callback=None, num_iters=100, step_size=0.001, b1=0.9, b2=0.999, eps=10**-8): """Adam as described in http://arxiv.org/pdf/1412.6980.pdf. It's basically RMSprop with momentum and some correction terms.""" m = np.zeros(len(x)) v = np.zeros(len(x)) if batch_id is not None: scale_factor = (2**(num_batches-batch_id)) / (2**(num_batches-1)) else: scale_factor = 1 for i in range(num_iters): g = grad(x, scale_factor) if callback: callback(x, i, g) m = (1 - b1) * g + b1 * m # First moment estimate. v = (1 - b2) * (g**2) + b2 * v # Second moment estimate. mhat = m / (1 - b1**(i + 1)) # Bias correction. vhat = v / (1 - b2**(i + 1)) x -= step_size*mhat/(np.sqrt(vhat) + eps) return x
def backward_pass(self, delta): if len(delta.shape) == 2: delta = delta[:, np.newaxis, :] n_samples, n_timesteps, input_shape = delta.shape p = self._params # Temporal gradient arrays grad = {k: np.zeros_like(p[k]) for k in p.keys()} dh_next = np.zeros((n_samples, input_shape)) output = np.zeros((n_samples, n_timesteps, self.input_dim)) # Backpropagation through time for i in reversed(range(n_timesteps)): dhi = self.activation_d(self.states[:, i, :]) * (delta[:, i, :] + dh_next) grad['W'] += np.dot(self.last_input[:, i, :].T, dhi) grad['b'] += delta[:, i, :].sum(axis=0) grad['U'] += np.dot(self.states[:, i - 1, :].T, dhi) dh_next = np.dot(dhi, p['U'].T) d = np.dot(delta[:, i, :], p['U'].T) output[:, i, :] = np.dot(d, p['W'].T) # Change actual gradient arrays for k in grad.keys(): self._params.update_grad(k, grad[k]) return output
def c_i(params,n,k,i,S,num_particles): first = np.zeros(S) second = np.zeros(S) samples = sample_theta(params,S) first = h_s(samples,n,k,num_particles)*gradient_log_recognition(params,samples,i) second = gradient_log_recognition(params,samples,i) return np.cov(first,second)[0][1]/np.cov(first,second)[1][1]
def test_fast_conv_grad(): skip = 1 block_size = (11, 11) depth = 1 img = np.random.randn(51, 51, depth) filt = np.dstack([cv.gauss_filt_2D(shape=block_size,sigma=2) for k in range(depth)]) filt = cv.gauss_filt_2D(shape=block_size, sigma=2) def loss_fun(filt): out = fc.convolve(filt, img) return np.sum(np.sin(out) + out**2) loss_fun(filt) loss_grad = grad(loss_fun) def loss_fun_slow(filt): out = auto_convolve(img.squeeze(), filt, mode='valid') return np.sum(np.sin(out) + out**2) loss_fun_slow(filt) loss_grad_slow = grad(loss_fun_slow) # compare gradient timing loss_grad_slow(filt) loss_grad(filt) ## check numerical gradients num_grad = np.zeros(filt.shape) for i in xrange(filt.shape[0]): for j in xrange(filt.shape[1]): de = np.zeros(filt.shape) de[i, j] = 1e-4 num_grad[i,j] = (loss_fun(filt + de) - loss_fun(filt - de)) / (2*de[i,j]) assert np.allclose(loss_grad(filt), num_grad), "convolution gradient failed!"
def simulate(vx, vy, num_time_steps, occlusion, ax=None, render=False): occlusion = sigmoid(occlusion) # Disallow occlusion outside a certain area. mask = np.zeros((rows, cols)) mask[10:30, 10:30] = 1.0 occlusion = occlusion * mask # Initialize smoke bands. red_smoke = np.zeros((rows, cols)) red_smoke[rows/4:rows/2] = 1 blue_smoke = np.zeros((rows, cols)) blue_smoke[rows/2:3*rows/4] = 1 print("Running simulation...") vx, vy = project(vx, vy, occlusion) for t in range(num_time_steps): plot_matrix(ax, red_smoke, occlusion, blue_smoke, t, render) vx_updated = advect(vx, vx, vy) vy_updated = advect(vy, vx, vy) vx, vy = project(vx_updated, vy_updated, occlusion) red_smoke = advect(red_smoke, vx, vy) red_smoke = occlude(red_smoke, occlusion) blue_smoke = advect(blue_smoke, vx, vy) blue_smoke = occlude(blue_smoke, occlusion) plot_matrix(ax, red_smoke, occlusion, blue_smoke, num_time_steps, render) return vx, vy
def dlnpdf(q, B): de = np.zeros(q.shape) grad_vec = np.zeros(q.shape) for i in range(len(q)): de[i] = 1e-6 grad_vec[i] = (ln_post(q + de, B) - ln_post(q - de, B)) / 2e-6 de[i] = 0.0 return grad_vec
def sample(self, n_samples=2000, observed_states=None, random_state=None): """Generate random samples from the self. Parameters ---------- n : int Number of samples to generate. observed_states : array If provided, states are not sampled. random_state: RandomState or an int seed A random number generator instance. If None is given, the object's random_state is used Returns ------- samples : array_like, length (``n_samples``) List of samples states : array_like, shape (``n_samples``) List of hidden states (accounting for tied states by giving them the same index) """ if random_state is None: random_state = self.random_state random_state = check_random_state(random_state) samples = np.zeros(n_samples) states = np.zeros(n_samples) if observed_states is None: startprob_pdf = np.exp(np.copy(self._log_startprob)) startdist = stats.rv_discrete(name='custm', values=(np.arange(startprob_pdf.shape[0]), startprob_pdf), seed=random_state) states[0] = startdist.rvs(size=1)[0] transmat_pdf = np.exp(np.copy(self._log_transmat)) transmat_cdf = np.cumsum(transmat_pdf, 1) nrand = random_state.rand(n_samples) for idx in range(1,n_samples): newstate = (transmat_cdf[states[idx-1]] > nrand[idx-1]).argmax() states[idx] = newstate else: states = observed_states mu = np.copy(self._mu_) precision = np.copy(self._precision_) for idx in range(n_samples): mean_ = self._mu_[states[idx]] var_ = np.sqrt(1/precision[states[idx]]) samples[idx] = norm.rvs(loc=mean_, scale=var_, size=1, random_state=random_state) states = self._process_sequence(states) return samples, states
def PhotometricError(iref, inew, R, T, points, D): # points is a tuple ([y], [x]); convert to homogeneous siz = iref.shape npoints = len(points[0]) f = siz[1] # focal length, FIXME Xref = np.vstack(((points[1] - siz[1]*0.5) / f, # x (siz[0]*0.5 - points[0]) / f, # y (left->right hand) np.ones(npoints))) # z = 1 # this is confusingly written -- i am broadcasting the translation T to # every column, but numpy broadcasting only works if it's rows, hence all # the transposes # print D * Xref Xnew = (np.dot(so3.exp(R), (D * Xref)).T + T).T # print Xnew # right -> left hand projection proj = Xnew[0:2] / Xnew[2] p = (-proj[1]*f + siz[0]*0.5, proj[0]*f + siz[1]*0.5) margin = 10 # int(siz[0] / 5) inwindow_mask = ((p[0] >= margin) & (p[0] < siz[0]-margin-1) & (p[1] >= margin) & (p[1] < siz[1]-margin-1)) npts_inw = sum(inwindow_mask) if npts_inw < 10: return 1e6, np.zeros(6 + npoints) # todo: filter points which are now out of the window oldpointidxs = (points[0][inwindow_mask], points[1][inwindow_mask]) newpointidxs = (p[0][inwindow_mask], p[1][inwindow_mask]) origpointidxs = np.nonzero(inwindow_mask)[0] E = InterpolatedValues(inew, newpointidxs) - iref[oldpointidxs] # dE/dk -> # d/dk r_p^2 = d/dk (Inew(w(r, T, D, p)) - Iref(p))^2 # = -2r_p dInew/dp dp/dw dw/dX dX/dk # = -2r_p * g(w(r, T, D, p)) * dw(r, T, D, p) # intensity gradients for each point Ig = InterpolatedGradients(inew, newpointidxs) # TODO: use tensors for this # gradients for R, T, and D gradient = np.zeros(6 + npoints) for i in range(npts_inw): # print 'newidx (y,x) = ', newpointidxs[0][i], newpointidxs[1][i] # Jacobian of w oi = origpointidxs[i] Jw = dw(Xref[0][oi], Xref[1][oi], D[oi], R, T) # scale back up into pixel space, right->left hand coords to get # Jacobian of p Jp = f * np.vstack((-Jw[1], Jw[0])) # print origpointidxs[i], 'Xref', Xref[:, i], 'Ig', Ig[:, i], \ # 'dwdRz', Jw[:, 2], 'dpdRz', Jp[:, 2] # full Jacobian = 2*E + Ig * Jp J = np.sign(E[i]) * np.dot(Ig[:, i], Jp) # print '2 E[i]', 2*E[i], 'Ig*Jp', np.dot(Ig[:, i], Jp) gradient[:6] += J[:6] # print J[:6] gradient[6+origpointidxs[i]] += J[6] print R, T, np.sum(np.abs(E)), npts_inw # return ((0.2*(npoints - npts_inw) + np.dot(E, E)), gradient) return np.sum(np.abs(E)) / (npts_inw), gradient / (npts_inw)
def c_i(params,i,S,num_particles): if S==1: return 0 first = np.zeros(S) second = np.zeros(S) samples = sample_theta(params,S) first = h_s(samples,num_particles)*gradient_log_variational(params,samples,i) second = gradient_log_variational(params,samples,i) return np.cov(first,second)[0][1]/np.cov(first,second)[1][1]
def unary_nd(f, x, eps=EPS): vs = vspace(x) nd_grad = np.zeros(vs.size) x_flat = vs.flatten(x) for d in range(vs.size): dx = np.zeros(vs.size) dx[d] = eps/2 nd_grad[d] = ( f(vs.unflatten(x_flat + dx)) - f(vs.unflatten(x_flat - dx)) ) / eps return vs.unflatten(nd_grad, True)
def plot_param_trace(params_trace, ax): '''Plot 2d trajectory of parameters on top of axis, param_trace is list of weight vectors''' n_steps = len(params_trace) xs = np.zeros(n_steps) ys = np.zeros(n_steps) for step in range(1, n_steps): xs[step] = params_trace[step][0] ys[step] = params_trace[step][1] ax.plot(xs, ys, 'o-')
def get_dxopt_dp(lin_solver, df_dx, d_dp_df_dx, d_dx_df_dx, A, b, xopt, p): ans = np.zeros(xopt.shape + p.shape) for index in np.ndindex(*p.shape): delta_p_direction = np.zeros(p.shape) delta_p_direction[index] = 1. temp = get_dxopt_delta_p(lin_solver, df_dx, d_dp_df_dx, d_dx_df_dx, A, b, xopt, p, delta_p_direction) ans[(slice(None),)+index] = temp[:len(xopt)] return ans
def abc_log_likelihood(samples,num_particles): N=num_particles S = len(samples) log_kernels = np.zeros(N) ll = np.zeros(S) for s in range(S): theta = samples[s] x,std = simulator(theta,N) log_kernels = log_abc_kernel(x,std) ll[s] = misc.logsumexp(log_kernels) ll[s] = np.log(1./N)+ll[s] return ll
def synthesize_trajectory(self, x0, u_array=None, n_itrs=50, tol=1e-6, verbose=True): if self.ilqr_ is None: print 'No iLQR solver has been prepared.' return None #initialization doesn't matter as global optimality can be guaranteed? if u_array is None: u_init = [np.zeros(self.n_dims_) for i in range(self.T_-1)] else: u_init = u_array x_init = np.concatenate([x0, np.zeros(self.n_dims_)]) res = self.ilqr_.ilqr_iterate(x_init, u_init, n_itrs=n_itrs, tol=tol, verbose=verbose) return res['x_array_opt'][:, 0:self.n_dims_]
def _init_params(self, data, lengths=None, params='stmp'): X = data['obs'] if 's' in params: self.startprob_.fill(1.0 / self.n_components) if 't' in params or 'm' in params or 'p' in params: kmmod = cluster.KMeans(n_clusters=self.n_unique, random_state=self.random_state).fit(X) kmeans = kmmod.cluster_centers_ if 't' in params: # TODO: estimate transitions from data (!) / consider n_tied=1 if self.n_tied == 0: transmat = np.ones([self.n_components, self.n_components]) np.fill_diagonal(transmat, 10.0) self.transmat_ = transmat # .90 for self-transition else: transmat = np.zeros((self.n_components, self.n_components)) transmat[range(self.n_components), range(self.n_components)] = 100.0 # diagonal transmat[range(self.n_components-1), range(1, self.n_components)] = 1.0 # diagonal + 1 transmat[[r * (self.n_chain) - 1 for r in range(1, self.n_unique+1) for c in range(self.n_unique-1)], [c * (self.n_chain) for r in range(self.n_unique) for c in range(self.n_unique) if c != r]] = 1.0 self.transmat_ = np.copy(transmat) if 'm' in params: mu_init = np.zeros((self.n_unique, self.n_features)) for u in range(self.n_unique): for f in range(self.n_features): mu_init[u][f] = kmeans[u, f] self.mu_ = np.copy(mu_init) if 'p' in params: precision_init = np.zeros((self.n_unique, self.n_features, self.n_features)) for u in range(self.n_unique): if self.n_features == 1: precision_init[u] = np.linalg.inv(np.cov(X[kmmod.labels_ == u], bias = 1)) else: precision_init[u] = np.linalg.inv(np.cov(np.transpose(X[kmmod.labels_ == u]))) self.precision_ = np.copy(precision_init)
def get_dL_dp_thru_xopt(lin_solver, df_dx, d_dp_df_dx, d_dx_df_dx, dL_dxopt, A, b, xopt, p, L_args=None, f_args=None): # assumes L(x_opt), x_opt = argmin_x f(x,p) subject to Ax<=b # L_args is for arguments to L besides x_opt # first, get dL/dws to calculate the gradient at ws1 if not L_args is None: pass #print 'L_args len:', len(L_args) else: print 'NONE' if L_args is None: dL_dxopt_anal_val1 = dL_dxopt(xopt) # else: # pdb.set_trace() dL_dxopt_anal_val1 = dL_dxopt(xopt, L_args) # get tight constraints A_tight, b_tight = get_tight_constraints(A, b, xopt) num_tight = A_tight.shape[0] # make C matrix # pdb.set_trace() if f_args is None: C_corner = d_dx_df_dx(xopt, p) else: C_corner = d_dx_df_dx(xopt, p, f_args) C = np.vstack((np.hstack((C_corner,-A_tight.T)), np.hstack((A_tight,np.zeros((num_tight,num_tight)))))) # print 'C', C # print 'C rank', np.linalg.matrix_rank(C), C.shape # print 'C corner rank', np.linalg.matrix_rank(C_corner), C_corner.shape # make d vector d = np.hstack((dL_dxopt_anal_val1, np.zeros(num_tight))) # solve Cv=d for x v = lin_solver(C, d) # print 'v', v #print C #print d print 'solver error:', np.linalg.norm(np.dot(C,v) - d) # make D if f_args is None: d_dp_df_dx_anal_val1 = d_dp_df_dx(p, xopt) else: d_dp_df_dx_anal_val1 = d_dp_df_dx(p, xopt, f_args) D = np.vstack((-d_dp_df_dx_anal_val1, np.zeros((num_tight,)+p.shape))) # print 'D', D[0:10] return np.sum(D.T * v[tuple([np.newaxis for i in xrange(len(p.shape))])+(slice(None),)], axis=-1).T
def associative_recall(seq_len, vec_size, item_size): """ Implements the associative recall task - section 4.3 from the paper. We show between seq_len items, each of which is item_size vec_size-bit binary vectors. Each item is preceded by a start bit. After all the items are shown, a fetch bit is shown. Then a randomly chosen item already seen is shown again. Then a final fetch bit. After the final fetch bit has been seen, the task is to reproduce the item that was seen after the item between fetch bits. """ input_size = vec_size + 2 output_size = vec_size length = (seq_len+1)*(item_size+1) + 1 + item_size inputs = np.zeros((length,input_size),dtype=np.float32) outputs = np.zeros((length,output_size),dtype=np.float32) start_bit = np.zeros((1,input_size)) start_bit[0,-2] = 1 fetch_bit = np.zeros((1,input_size)) fetch_bit[0,-1] = 1 # generate seq_len random items items = [] for i in range(seq_len): items.append(np.random.randint(2, size=(item_size, vec_size))) a = i*(item_size+1) b = a + item_size inputs[a] = start_bit inputs[a+1:b+1,:-2] = items[i] # pick an item at random that isn't the last item idx = np.random.randint(low=0, high=len(items) - 1) fetch_item = items[idx] inputs[b+1] = fetch_bit inputs[b+2:b+2+item_size, :-2] = fetch_item inputs[b+2 + item_size] = fetch_bit # choose a random item to be the prompt outputs[-items[idx+1].shape[0]:inputs.shape[0]] = items[idx+1] return inputs, outputs, seq_len
def pack_dense(A, b, *args): '''Used for packing Gaussian natural parameters and statistics into a dense ndarray so that we can use tensordot for all the linear contraction ops.''' # we don't use a symmetric embedding because factors of 1/2 on h are a pain leading_dim, N = b.shape[:-1], b.shape[-1] z1, z2 = np.zeros(leading_dim + (N, 1)), np.zeros(leading_dim + (1, 1)) c, d = args if args else (z2, z2) A = A[...,None] * np.eye(N)[None,...] if A.ndim == b.ndim else A b = b[...,None] c, d = np.reshape(c, leading_dim + (1, 1)), np.reshape(d, leading_dim + (1, 1)) return vs(( hs(( A, b, z1 )), hs(( T(z1), c, z2 )), hs(( T(z1), z2, d ))))
def adam(grad, x, callback=None, num_iters=100, step_size=0.001, b1=0.1, b2=0.0001, eps = 10**-8): """Adam as described in http://arxiv.org/pdf/1412.6980.pdf. It's basically RMSprop with momentum and some correction terms.""" m = np.zeros(len(x)) v = np.zeros(len(x)) for i in range(num_iters): g = grad(x, i) if callback: callback(x, i, g) m = b1 * g + (1 - b1) * m # First moment estimate. v = b2 * (g**2) + (1 - b2) * v # Second moment estimate. mhat = m / (1 - (1 - b1)**(i + 1)) # Bias correction. vhat = v / (1 - (1 - b2)**(i + 1)) x -= step_size*mhat/(np.sqrt(vhat) + eps) return x
def test_linear_system(self): """ Tests taking the derivative across a linear system solve """ def linsolve(params): A, B = params return np.linalg.solve(A, B) B = np.array([1.0, 3.0]) A = np.array([[5.0, 2.0],[1.0, 3.0]]) df = jacobian(linsolve) diff = df([A, B]) Ainv = np.linalg.inv(A) x = np.linalg.solve(A, B) #df_dB assert np.linalg.norm(diff[0][1] - Ainv[0]) < 1e-10 assert np.linalg.norm(diff[1][1] - Ainv[1]) < 1e-10 #df_fA dr_da = np.zeros((2,4)) dr_da[0, 0:2] = x dr_da[1, 2:] = x df_fa = np.linalg.solve(A, -dr_da) assert np.linalg.norm(df_fa[0] - diff[0][0].flatten()) < 1e-10 assert np.linalg.norm(df_fa[1] - diff[1][0].flatten()) < 1e-10
def vjp_all(g): vjp_y = g[-1, :] vjp_t0 = 0 time_vjp_list = [] vjp_args = np.zeros(np.size(flat_args)) for i in range(T - 1, 0, -1): # Compute effect of moving measurement time. vjp_cur_t = np.dot(func(yt[i, :], t[i], *func_args), g[i, :]) time_vjp_list.append(vjp_cur_t) vjp_t0 = vjp_t0 - vjp_cur_t # Run augmented system backwards to the previous observation. aug_y0 = np.hstack((yt[i, :], vjp_y, vjp_t0, vjp_args)) aug_ans = odeint(augmented_dynamics, aug_y0, np.array([t[i], t[i - 1]]), tuple((flat_args,)), **kwargs) _, vjp_y, vjp_t0, vjp_args = unpack(aug_ans[1]) # Add gradient from current output. vjp_y = vjp_y + g[i - 1, :] time_vjp_list.append(vjp_t0) vjp_times = np.hstack(time_vjp_list)[::-1] return None, vjp_y, vjp_times, unflatten(vjp_args)
def gen_prior(K_chol, sig2_omega, sig2_mu): th = np.zeros(parser.N) N = parser.idxs_and_shapes['mus'][1][0] parser.set(th, 'betas', K_chol.dot(npr.randn(len(lam0), K)).T) parser.set(th, 'omegas', np.sqrt(sig2_omega) * npr.randn(N, K)) parser.set(th, 'mus', np.sqrt(sig2_mu) * npr.randn(N)) return th
def multivariate_t_rvs(self, m, S, random_state = None): '''generate random variables of multivariate t distribution Parameters ---------- m : array_like mean of random variable, length determines dimension of random variable S : array_like square array of covariance matrix df : int or float degrees of freedom n : int number of observations, return random array will be (n, len(m)) random_state : int seed Returns ------- rvs : ndarray, (n, len(m)) each row is an independent draw of a multivariate t distributed random variable ''' np.random.rand(9) m = np.asarray(m) d = self.n_features df = self.degree_freedom n = 1 if df == np.inf: x = 1. else: x = random_state.chisquare(df, n)/df np.random.rand(90) z = random_state.multivariate_normal(np.zeros(d),S,(n,)) return m + z/np.sqrt(x)[:,None]
def get_marginal(self, u, V, R, x_test): ''' current metric to test convergence-- log space predictive marginal likelihood ''' I = self.sigx*np.identity(self.dimx) mu = np.zeros(self.dimx,) n_samples = 200 ll = 0 test_size = x_test.shape[0] for i in xrange(test_size): x = x_test[i] mc = 0 for j in xrange(n_samples): w = self.sample_w(u, V) var = np.dot(w, np.transpose(w)) var = np.add(var, I) px = gaussian.Gaussian_full(mu, var) px = px.eval(x)#eval_log_properly(x) mc = mc + px mc = mc/float(n_samples) mc = np.log(mc) ll += mc return (ll/float(test_size))
def read_cifar_set(path): with open(path, 'rb') as f: d = cPickle.load(f) vectorized_labels = np.zeros((len(d['labels']), 10)) for no, label in enumerate(d['labels']): vectorized_labels[no][label] = 1. return d['data'], vectorized_labels
def outputs(weights, input_set, fence_set, output_set=None, return_pred_set=False): update_x_weights = parser.get(weights, 'update_x_weights') update_h_weights = parser.get(weights, 'update_h_weights') reset_x_weights = parser.get(weights, 'reset_x_weights') reset_h_weights = parser.get(weights, 'reset_h_weights') thidden_x_weights = parser.get(weights, 'thidden_x_weights') thidden_h_weights = parser.get(weights, 'thidden_h_weights') output_h_weights = parser.get(weights, 'output_h_weights') data_count = len(fence_set) - 1 feat_count = input_set.shape[0] ll = 0.0 n_i_track = -1 fence_base = fence_set[0] interval = fence_set[1] - fence_set[0] pred_set = None if return_pred_set: pred_set = np.zeros( (output_count, int(input_set.shape[1] / interval))) print('Prediction set sized ', pred_set.shape) # loop through sequences and time steps for data_iter in range(data_count): # print('Executing iteration %d'%data_iter) hiddens = copy(parser.get(weights, 'init_hiddens')) fence_post_1 = fence_set[data_iter] - fence_base fence_post_2 = fence_set[data_iter + 1] - fence_base time_count = fence_post_2 - fence_post_1 curr_input = input_set[:, fence_post_1:fence_post_2] for time_iter in range(time_count): hiddens = update( np.expand_dims(np.hstack((curr_input[:, time_iter], 1)), axis=0), hiddens, update_x_weights, update_h_weights, reset_x_weights, reset_h_weights, thidden_x_weights, thidden_h_weights) n_i_track += 1 if output_set is not None: # subtract a small number so -1 out_proba = softmax_sigmoid(np.dot(hiddens, output_h_weights)) out_lproba = safe_log(out_proba) ll += np.sum(output_set[:, n_i_track] * out_lproba) else: out_proba = softmax_sigmoid(np.dot(hiddens, output_h_weights)) out_lproba = safe_log(out_proba) if return_pred_set: agm = np.argmax(out_lproba[0]) pred_set[agm, n_i_track] = int(1) return ll, pred_set
def __init__(self, param_set, buffer_size=200000, buffer_type='Qnetwork', mem_priority=True, general=False): """Initialize the storage containers and parameters relevant for experience replay. Arguments: param_set -- dictionary of parameters which must contain: PER_alpha -- hyperparameter governing how much prioritization is used PER_beta_zero -- importance sampling parameter initial value bnn_start -- number of timesteps before sample will be drawn; i.e the minimum partition size (necessary if buffer_type=='BNN') dqn_start -- same as dqn_start (necessary if buffer_type=='Qnetwork') episode_count -- number of episodes instance_count -- number of instances max_task_examples -- maximum number of timesteps per episode ddqn_batch_size -- minibatch size for DQN updates (necessary if buffer_type=='Qnetwork') bnn_batch_size -- minibatch size for BNN updates (necessary if buffer_type=='BNN') num_strata_samples -- number of samples to be drawn from each strata in the prioritized replay buffer general_num_partitions -- number of partitions for general experience buffer instance_num_partitions -- number of partitions for instance experience buffer Keyword arguments: buffer_size -- maximum capacity of the experience buffer (default: 200000) buffer_type -- string indicating whether experience replay is for training a DQN or a BNN (either 'Qnetwork' or 'BNN'; default: 'Qnetwork') mem_priority -- boolean indicating whether the experience replay should be prioritized (default: True) general -- boolean indicating if the experience replay is for collecting experiences over multiple instances or a single (default: False) """ # Extract/Set relevant parameters self.mem_priority = mem_priority self.alpha = param_set['PER_alpha'] self.beta_zero = param_set['PER_beta_zero'] self.capacity = buffer_size self.is_full = False self.index = 0 # Index number in priority queue where next transition should be inserted self.size = 0 # Current size of experience replay buffer if buffer_type == 'Qnetwork': self.num_init_train = param_set['dqn_start'] self.tot_steps = param_set['episode_count'] * param_set[ 'max_task_examples'] self.batch_size = param_set['ddqn_batch_size'] elif buffer_type == 'BNN': self.num_init_train = param_set['bnn_start'] self.tot_steps = ( param_set['episode_count'] * param_set['instance_count']) * param_set['max_task_examples'] self.batch_size = param_set['bnn_batch_size'] self.beta_grad = (1 - self.beta_zero) / (self.tot_steps - self.num_init_train) self.num_strata_samples = param_set['num_strata_samples'] # Note: at least one partition must be completely filled in order for the sampling procedure to work self.num_partitions = self.capacity / (1.0 * self.num_init_train) # Initialize experience buffer self.exp_buffer = [] # Initialize rank priority distributions and stratified sampling cutoffs if needed if self.mem_priority: # Initialize Priority Queue (will be implemented as a binary heap) self.pq = PriorityQueue(capacity=buffer_size) self.distributions = {} partition_num = 1 partition_division = self.capacity / self.num_partitions for n in np.arange(partition_division, self.capacity + 0.1, partition_division): # Set up power-law PDF and CDF distribution = {} distribution['pdf'] = np.power(np.linspace(1, n, n), -1 * self.alpha) pdf_sum = np.sum(distribution['pdf']) distribution['pdf'] = distribution['pdf'] / float( pdf_sum) # Normalise PDF cdf = np.cumsum(distribution['pdf']) # Set up strata for stratified sampling (transitions will have varying TD-error magnitudes) distribution['strata_ends'] = np.zeros(self.batch_size + 1) distribution['strata_ends'][0] = 0 # First index is 0 (+1) distribution['strata_ends'][ self.batch_size] = n # Last index is n # Use linear search to find strata indices stratum = 1.0 / self.batch_size index = 0 for s in range(1, self.batch_size): if cdf[index] >= stratum: index += 1 while cdf[index] < stratum: index = index + 1 distribution['strata_ends'][s] = index stratum = stratum + 1.0 / self.batch_size # Set condition for next stratum # Store distribution self.distributions[partition_num] = distribution partition_num = partition_num + 1
theta_dict["c_hat"] = .2 theta_dict["sigma_epsilon"] = 1 theta_dict["gamma"] = .1 imp.reload(policies) pecmy = policies.policies(data, params, b, rcv_path=rcvPath) b_k1 = np.array([.7, 0, .5, .2, 1, .3]) out = pecmy.est_loop(b_k1, theta_dict) # Loss seemed monotonically decreasing in c... 1.76 - 1.39, why? # probably has to do with a lot of countries sitting at upper bound of preference parameter...loosening constraints makes policies more realistic # out_dict = pecmy.est_loop(b_init, theta_dict_init, est_c=True) rcv = np.zeros( (pecmy.N, pecmy.N )) # empty regime change value matrix (row's value for invading column) for i in range(pecmy.N): b_nearest = hp.find_nearest(b_init, b[i]) rcv[i, ] = pecmy.rcv[b_nearest][i, ] pecmy.ecmy.tau pecmy.ecmy.Y # start_time = time.time() # out_dict = pecmy.est_loop(b_init, theta_dict_init) # print("--- %s seconds ---" % (time.time() - start_time)) if not os.path.exists(resultsPath + "estimates_sv.csv"): theta_dict_init = dict()
def default_gradient(x): return np.zeros(input_dim)
def main(): # Create initial condition. u = np.ones(nx) # Apply the step condition. u[int(.5 / dx):int(1 / dx + 1)] = 2 # Print initial condition. un = np.ones(nx) # Prepare the residue vector. rhs = np.zeros(nx) # Iterate the solution and monitor the eigenvalues. for n in range(nt): # Dump iteration count. print(" +++ Time: " + str(n) + " +++") # Copy the solution of the explicit time marching scheme. un = u.copy() # Separate the residue. rhs = frhs_vec(un, nu, dx) # March the residue. u = un + dt * rhs # Computes the derivative of the residues with respect to the solution vector. eps = 0.0001 drhs_du = np.zeros( (nx - 1, nx - 1)) # In order to take the eigenvalues, this shall be a matrix. # This loop computes the jacobian matrix according to http://www.netlib.org/math/docpdf/ch08-04.pdf for i in range(1, nx - 1): for j in range(1, nx - 1): drhs_du[i, j] = (frhs(un[i - 1] + eps, un[i] + eps, un[i + 1] + eps, dx, nu) - frhs(un[j - 1], un[j], un[j + 1], dx, nu)) / eps # Build the Hirsch matrix (chap 8). s_m = np.zeros((nx - 1, nx - 1)) # Fill the diagonals s_m = (nu / dx**2.0) * create_diagonal(1.0, -2.0, 1.0, nx - 1) # Solve the eigenvalues. w1, v1 = np.linalg.eig(drhs_du) w2, v2 = np.linalg.eig(s_m) # Prepare the plots. real1 = np.zeros(nx) imag1 = np.zeros(nx) real1 = -np.sort(-w1.real[:]) imag1 = -np.sort(-w1.imag[:]) real2 = np.zeros(nx) imag2 = np.zeros(nx) real2 = -np.sort(-w2.real[:]) imag2 = -np.sort(-w2.imag[:]) print("\n") print("Minimun eigenvalues (Frechet): Real(eig): ", min(real1), " Imaginary: Imag(eig): ", min(imag1)) print("Maximun eigenvalues (Frechet): Real(eig): ", max(real1), " Imaginary: Imag(eig): ", max(imag1)) print("Minimun eigenvalues (Hirsch ): Real(eig): ", min(real2), " Imaginary: Imag(eig): ", min(imag2)) print("Maximun eigenvalues (Hirsch ): Real(eig): ", max(real2), " Imaginary: Imag(eig): ", max(imag2)) # Print both matrices. print(np.matrix(s_m)) print("------------------------------------------------------------") print(np.matrix(drhs_du)) # plot the eigenvalues. plt.figure(3) fig, ax = plt.subplots(3, figsize=(11, 11)) ax[0].plot(imag1, real1, 'ro') ax[0].set(ylabel='Real(Eig)', xlabel='Imag(Eig)') ax[0].set_xlim(-0.06, 0.06) # ax[0].set_ylim(-70.0,10.0) ax[1].plot(imag2, real2, 'ro') ax[1].set(ylabel='Real(Eig)', xlabel='Imag(Eig)') ax[1].set_xlim(-0.06, 0.06) # ax[1].set_ylim(-70.0,10.0) ax[2].plot(np.linspace(0, 2, nx), u) ax[2].set(xlabel='x', ylabel='u') image_name = str(n) + "image" + ".png" plt.savefig(image_name) plt.close()
def quad_part(self, Y, beta, lmbda, K): N, d = Y.shape if K is None: K = np.ones([1, N]) return np.zeros([d, d, K.shape[0]])
import autograd.numpy as np from autograd import grad import scipy.stats as sp import matplotlib.pyplot as plt def pDist(z): return np.exp(-z**2)*((1+np.exp(-10*z-3))**-1) def dUdz(z,e): return (pDist(z+e)-pDist(z))/e smpls=50000; burnin=100000; total=smpls+burnin; z = np.empty(total); z[0] = 0; r = np.empty(total); r[0] = 0; accept = np.zeros(total); ratio = np.zeros(total); unif_RV = np.random.uniform(size=total) eps = np.array([0.005,0.01,0.1,0.2,0.5, 1]); e = 5; eps1 = eps[e]; L = 10; # leapgrog steps M = np.array([1]); s = np.float(M[0]) # mass definition matrix #accRatio = np.zeros((len(eps))) for i in np.arange(0,total-1): r[i] = np.random.normal(0,s) Kr = (1/2)*r[i]**2*(s**(-1)) # kinetic energy Uz = -np.log(pDist(z[i])) # potential energ
def e_step(sigma,X, mask_var, mask_samp,n_ul = 0): """ E step for MTL algorithm. """ n = X.shape[0] n_tr = np.size(np.where(mask_samp == False)[0])-n_ul mask_var_c = ((mask_var+1)%2).astype(bool) mask_samp_c = ((mask_samp+1)%2).astype(bool) if n_ul>0: mask_samp_c[0:n_ul] = False sigma_obs = sigma[mask_var].T[mask_var].T sigma_cond = sigma[mask_var].T[mask_var_c].T mat_prod = sigma_cond.T.dot(np.linalg.inv(sigma_obs)) mu_upd =mat_prod.dot(X[mask_samp_c].T[mask_var]) m = mask_samp_c[:,None]*mask_var_c[None,:] X[m] = mu_upd.T.flatten() #compute sigma sigma_miss = sigma[mask_var_c].T[mask_var_c].T sigma_upd = sigma_miss - mat_prod.dot(sigma_cond) #print sigma_upd if n_ul>0: mask_vul = np.copy(mask_var) mask_vul[0] = False mask_vul[1:] = True mask_vul_c = ((mask_vul+1)%2).astype(bool) sigma_obs_ul = sigma[mask_vul].T[mask_vul].T sigma_cond_ul = sigma[mask_vul].T[mask_vul_c].T mask_samp_ul = np.zeros(mask_samp.shape,dtype = bool) mask_samp_ul[0:n_ul] = True mat_prod_ul = sigma_cond_ul.T.dot(np.linalg.inv(sigma_obs_ul)) mu_upd =mat_prod_ul.dot(X[mask_samp_ul].T[mask_vul]) m = mask_samp_ul[:,None]*mask_vul_c[None,:] #X[m] =-0.65768# #X[m] = ex.flatten()# X[m] = mu_upd.T.flatten() #X[0:n_ul,0] = ex.flatten() sigma_miss_ul = sigma[mask_vul_c].T[mask_vul_c].T sigma_upd_ul = sigma_miss_ul - mat_prod_ul.dot(sigma_cond_ul) m_ul = mask_vul_c[:,None]*mask_vul_c[None,:] sigma_new = np.cov(X.T) #exit() m = mask_var_c[:,None]*mask_var_c[None,:] sigma_new[m] += sigma_upd.flatten()*n_tr/float(n) if n_ul>0: sigma_new[m_ul] +=n_ul/float(n)*sigma_upd_ul.flatten() return sigma_new
def pad_tensor(self,tensor,kernel_size): odd_nums = np.array([int(2*n + 1) for n in range(100)]) pad_val = np.argwhere(odd_nums == kernel_size)[0][0] tensor_padded = np.zeros((np.shape(tensor)[0], np.shape(tensor)[1] + 2*pad_val,np.shape(tensor)[2] + 2*pad_val)) tensor_padded[:,pad_val:-pad_val,pad_val:-pad_val] = tensor return tensor_padded
def make_niw_natparam(n): if not random: nu, S, mu, kappa = n+10., (n+10.)*np.eye(n), np.zeros(n), 10. else: nu, S, mu, kappa = n+4.+npr.rand(), (n+npr.rand())*np.eye(n), npr.randn(n), npr.rand() return niw.standard_to_natural(nu, S, mu, kappa)
pp.savefig(fig) pp.close() plt.close() ############################ Data for prediction likelihood plot epsilons = np.geomspace(0.1, 10, 5) iter_sigmas = np.empty(len(epsilons)) for i, epsilon in enumerate(epsilons): iter_sigmas[i] = find_sigma_act(epsilon, 1e-3, T, q)[0] iter_sigmas = np.array([0.27, 0.34, 0.48, 1.05, 2.0]) n_ave = 5 pred_likes = np.zeros([len(iter_sigmas), n_ave]) for i, sigma in enumerate(iter_sigmas): print sigma for n in range(n_ave): print n par = dp_advi(data, logl, logprior, T, start, C, B, eta, sigma, k) pred_likes[i, n] = pred_like(par, test_data, k) np_pred_likes = [] np_T = 2000 np_eta = 0.01 for n in range(n_ave): print n par = dp_advi(data, logl, logprior, np_T, start, C, B, np_eta, 0, k) np_pred_likes.append(pred_like(par, test_data, k))
def save_images(fake_data, igp_data, real_data, out_dir, nm, dsc_params, iter): # plot igp objective, dimension 1 binsize = (max(igp_data.T[0]) - min(igp_data.T[0])) / 100 plt.hist(fake_data.T[0], bins=np.arange(min(igp_data.T[0]), max(igp_data.T[0]), binsize), color='b', alpha=0.5) plt.hist(igp_data.T[0], bins=np.arange(min(igp_data.T[0]), max(igp_data.T[0]), binsize), color='g', alpha=0.5) plt.ylabel('Histogram counts (Dimension 1)') plt.title('Generated X (blue) vs. IGP (green)') plt.savefig(out_dir + 'gan_samples_X_IGP_' + nm + '_d1.png') plt.close() # plot igp objective, dimension 2 binsize = (max(igp_data.T[1]) - min(igp_data.T[1])) / 100 plt.hist(fake_data.T[1], bins=np.arange(min(igp_data.T[1]), max(igp_data.T[1]), binsize), color='b', alpha=0.5) plt.hist(igp_data.T[1], bins=np.arange(min(igp_data.T[1]), max(igp_data.T[1]), binsize), color='g', alpha=0.5) plt.ylabel('Histogram counts (Dimension 2)') plt.title('Generated X (blue) vs. IGP (green)') plt.savefig(out_dir + 'gan_samples_X_IGP_' + nm + '_d2.png') plt.close() if iter % 10 == 0: g = sns.jointplot(x=fake_data.T[0], y=fake_data.T[1], stat_func=None, alpha=0.1) plt.sca(g.ax_joint) sns.kdeplot(real_data.T[0], real_data.T[1], ax=g.ax_joint) rmin = np.array([-61, -41]) rmax = np.array([41, 61]) dsc_bins = np.zeros((103, 103)) for _i in range(rmin[0], rmax[0]): for _j in range(rmin[1], rmax[1]): query_pt = np.array([_i, _j]) val = neural_net_predict_dsc(query_pt, dsc_params)[0] dsc_bins[_i + 61][_j + 41] = val norm = Normalize(vmin=min(dsc_bins.flatten()), vmax=max(dsc_bins.flatten())) g.ax_joint.imshow(dsc_bins, interpolation='nearest', cmap=matplotlib.cm.autumn, origin='upper', norm=norm, extent=[-51, 51, -51, 51], alpha=0.5) g.ax_marg_x.set_title('Critic: Red = Real, Yellow = Fake') plt.tight_layout() plt.savefig(out_dir + 'gan_samples_X_' + nm + '.png') plt.close() return return
def apply_poly(self, x_poly, lst_poly): res = Poly() no_neurons = len(x_poly.lw) res.lw = np.zeros(no_neurons) res.up = np.zeros(no_neurons) res.le = np.zeros([no_neurons, no_neurons + 1]) res.ge = np.zeros([no_neurons, no_neurons + 1]) if self.func == relu: for i in range(no_neurons): if x_poly.up[i] <= 0: pass elif x_poly.lw[i] >= 0: res.le[i, i] = 1 res.ge[i, i] = 1 res.lw[i] = x_poly.lw[i] res.up[i] = x_poly.up[i] else: res.le[i, i] = x_poly.up[i] / (x_poly.up[i] - x_poly.lw[i]) res.le[i, -1] = -x_poly.up[i] * x_poly.lw[i] / (x_poly.up[i] - x_poly.lw[i]) lam = 0 if x_poly.up[i] <= -x_poly.lw[i] else 1 res.ge[i, i] = lam res.lw[i] = 0 # it seems safe to set lw = 0 anyway # res.lw[i] = lam * x_poly.lw[i] # notice: mnist_relu_5_10.tf res.up[i] = x_poly.up[i] elif self.func == sigmoid: res.lw = sigmoid(x_poly.lw) res.up = sigmoid(x_poly.up) for i in range(no_neurons): if x_poly.lw[i] == x_poly.up[i]: res.le[i][-1] = res.lw[i] res.ge[i][-1] = res.lw[i] else: if x_poly.lw[i] > 0: lam1 = (res.up[i] - res.lw[i]) / (x_poly.up[i] - x_poly.lw[i]) if x_poly.up[i] <= 0: lam2 = lam1 else: ll = sigmoid( x_poly.lw[i]) * (1 - sigmoid(x_poly.lw[i])) uu = sigmoid( x_poly.up[i]) * (1 - sigmoid(x_poly.up[i])) lam2 = min(ll, uu) else: ll = sigmoid( x_poly.lw[i]) * (1 - sigmoid(x_poly.lw[i])) uu = sigmoid( x_poly.up[i]) * (1 - sigmoid(x_poly.up[i])) lam1 = min(ll, uu) if x_poly.up[i] <= 0: lam2 = (res.up[i] - res.lw[i]) / (x_poly.up[i] - x_poly.lw[i]) else: lam2 = lam1 res.ge[i, i] = lam1 res.ge[i, -1] = res.lw[i] - lam1 * x_poly.lw[i] res.le[i, i] = lam2 res.le[i, -1] = res.up[i] - lam2 * x_poly.up[i] elif self.func == tanh: res.lw = tanh(x_poly.lw) res.up = tanh(x_poly.up) for i in range(no_neurons): if x_poly.lw[i] == x_poly.up[i]: res.le[i][-1] = res.lw[i] res.ge[i][-1] = res.lw[i] else: if x_poly.lw[i] > 0: lam1 = (res.up[i] - res.lw[i]) / (x_poly.up[i] - x_poly.lw[i]) if x_poly.up[i] <= 0: lam2 = lam1 else: ll = 1 - pow(tanh(x_poly.lw[i]), 2) uu = 1 - pow(tanh(x_poly.up[i]), 2) lam2 = min(ll, uu) else: ll = 1 - pow(tanh(x_poly.lw[i]), 2) uu = 1 - pow(tanh(x_poly.up[i]), 2) lam1 = min(ll, uu) if x_poly.up[i] <= 0: lam2 = (res.up[i] - res.lw[i]) / (x_poly.up[i] - x_poly.lw[i]) else: lam2 = lam1 res.ge[i, i] = lam1 res.ge[i, -1] = res.lw[i] - lam1 * x_poly.lw[i] res.le[i, i] = lam2 res.le[i, -1] = res.up[i] - lam2 * x_poly.up[i] return res
def ord_params_GLLVM(y_ord, nj_ord, lambda_ord_old, ps_y, pzl1_ys, zl1_s, AT,\ tol = 1E-5, maxstep = 100): ''' Determine the GLLVM coefficients related to ordinal coefficients by optimizing each column coefficients separately. y_ord (numobs x nb_ord nd-array): The ordinal data nj_ord (list of int): The number of modalities for each ord variable lambda_ord_old (list of nb_ord_j x (nj_ord + r1) elements): The ordinal coefficients of the previous iteration ps_y ((numobs, S) nd-array): p(s | y) for all s in Omega pzl1_ys (nd-array): p(z1 | y, s) zl1_s ((M1, r1, s1) nd-array): z1 | s AT ((r1 x r1) nd-array): Var(z1)^{-1/2} tol (int): Control when to stop the optimisation process maxstep (int): The maximum number of optimization step. ---------------------------------------------------------------------- returns (list of nb_ord_j x (nj_ord + r1) elements): The new ordinal coefficients ''' #**************************** # Ordinal link parameters #**************************** r0 = zl1_s.shape[1] S0 = zl1_s.shape[2] nb_ord = len(nj_ord) new_lambda_ord = [] for j in range(nb_ord): #enc = OneHotEncoder(categories='auto') enc = OneHotEncoder(categories=[list(range(nj_ord[j]))]) y_oh = enc.fit_transform(y_ord[:, j][..., n_axis]).toarray() # Define the constraints such that the threshold coefficients are ordered nb_constraints = nj_ord[j] - 2 if nb_constraints > 0: nb_params = nj_ord[j] + r0 - 1 lcs = np.full(nb_constraints, -1) lcs = np.diag(lcs, 1) np.fill_diagonal(lcs, 1) lcs = np.hstack([lcs[:nb_constraints, :], \ np.zeros([nb_constraints, nb_params - (nb_constraints + 1)])]) linear_constraint = LinearConstraint(lcs, np.full(nb_constraints, -np.inf), \ np.full(nb_constraints, 0), keep_feasible = True) opt = minimize(ord_loglik_j, lambda_ord_old[j] ,\ args = (y_oh, zl1_s, S0, ps_y, pzl1_ys, nj_ord[j]), tol = tol, method='trust-constr', jac = ord_grad_j, \ constraints = linear_constraint, hess = '2-point',\ options = {'maxiter': maxstep}) else: # For Nj = 2, only 2 - 1 = 1 intercept coefficient: no constraint opt = minimize(ord_loglik_j, lambda_ord_old[j], \ args = (y_oh, zl1_s, S0, ps_y, pzl1_ys, nj_ord[j]), \ tol = tol, method='BFGS', jac = ord_grad_j, options = {'maxiter': maxstep}) res = opt.x if not (opt.success ): # If the program fail, keep the old estimate as value res = lambda_ord_old[j] warnings.warn('One of the ordinal optimisations has failed', RuntimeWarning) # Ensure identifiability for Lambda_j new_lambda_ord_j = (res[-r0:].reshape(1, r0) @ AT[0]).flatten() new_lambda_ord_j = np.hstack( [deepcopy(res[:nj_ord[j] - 1]), new_lambda_ord_j]) new_lambda_ord.append(new_lambda_ord_j) return new_lambda_ord
def init_niw_natparam(N): nu, S, m, kappa = N+niw_conc, (N+niw_conc)*np.eye(N), np.zeros(N), niw_conc m = m + random_scale * npr.randn(*m.shape) return niw.standard_to_natural(S, m, kappa, nu)
if 3 in parts: print('PART 3') # c/d def forward(x, w): p = np.exp(np.dot(x, w)) p = p / p.sum(axis=1, keepdims=True) return p def ce_grad_mean(x, y, p): return 1 / len(x) * np.dot(x.T, (p - y)) epochs = 50 batch_size = 32 w = np.zeros((784, 10)) for _ in tqdm(range(epochs)): for batch in range(0, len(train_images), batch_size): x = train_images[batch:batch + batch_size] y = train_labels[batch:batch + batch_size] p = forward(x, w) w -= 0.001 * ce_grad_mean(x, y, p) p_train = forward(train_images, w) p_test = forward(test_images, w) avg_logp_train = average_logp(p_train, train_labels) avg_logp_test = average_logp(p_test, test_labels) avg_acc_train = average_accuracy(p_train, train_labels) avg_acc_test = average_accuracy(p_test, test_labels) metrics = [ 'Average training log likelihood: %g' % avg_logp_train, 'Average test log likelihood: %g' % avg_logp_test,
def hom_2d_to_3d(pts): pts = np.insert(pts, 2, np.zeros(pts.shape[1]), 0) return pts
# Initialize weights rs = npr.RandomState() W = rs.randn(N_weights) * param_scale print(" Epoch | Train err | Test error ") def print_perf(epoch, W): test_perf = frac_err(W, test_musics, test_labels) train_perf = frac_err(W, train_musics, train_labels) print("{0:15}|{1:15}|{2:15}".format(epoch, train_perf, test_perf)) # Train with sgd batch_idxs = make_batches(N_data, batch_size) cur_dir = np.zeros(N_weights) for epoch in range(num_epochs): print_perf(epoch, W) for idxs in batch_idxs: grad_W = loss_grad(W, train_musics[idxs], train_labels[idxs]) cur_dir = momentum * cur_dir + (1.0 - momentum) * grad_W W -= learning_rate * cur_dir test_perf = frac_err(W, test_musics, test_labels) if test_perf < test_perf_optim: #updating optimal paramaters print('NEW OPTIMUM') print(L2_reg, neurone_number, learning_rate) test_perf_optim = test_perf params_optim = L2_reg, neurone_number, learning_rate
def sample(self, T, prefix=None, input=None, tag=None, with_noise=True): """ Sample synthetic data from the model. Optionally, condition on a given prefix (preceding discrete states and data). Parameters ---------- T : int number of time steps to sample prefix : (zpre, xpre) Optional prefix of discrete states (zpre) and continuous states (xpre) zpre must be an array of integers taking values 0...num_states-1. xpre must be an array of the same length that has preceding observations. input : (T, input_dim) array_like Optional inputs to specify for sampling tag : object Optional tag indicating which "type" of sampled data with_noise : bool Whether or not to sample data with noise. Returns ------- z_sample : array_like of type int Sequence of sampled discrete states x_sample : (T x observation_dim) array_like Array of sampled data """ K = self.K D = (self.D, ) if isinstance(self.D, int) else self.D M = (self.M, ) if isinstance(self.M, int) else self.M assert isinstance(D, tuple) assert isinstance(M, tuple) assert T > 0 # Check the inputs if input is not None: assert input.shape == (T, ) + M # Get the type of the observations dummy_data = self.observations.sample_x(0, np.empty(0, ) + D) dtype = dummy_data.dtype # Initialize the data array if prefix is None: # No prefix is given. Sample the initial state as the prefix. pad = 1 z = np.zeros(T, dtype=int) data = np.zeros((T, ) + D, dtype=dtype) input = np.zeros((T, ) + M) if input is None else input mask = np.ones((T, ) + D, dtype=bool) # Sample the first state from the initial distribution pi0 = self.init_state_distn.initial_state_distn z[0] = npr.choice(self.K, p=pi0) data[0] = self.observations.sample_x(z[0], data[:0], input=input[0], with_noise=with_noise) # We only need to sample T-1 datapoints now T = T - 1 else: # Check that the prefix is of the right type zpre, xpre = prefix pad = len(zpre) assert zpre.dtype == int and zpre.min() >= 0 and zpre.max() < K assert xpre.shape == (pad, ) + D # Construct the states, data, inputs, and mask arrays z = np.concatenate((zpre, np.zeros(T, dtype=int))) data = np.concatenate((xpre, np.zeros((T, ) + D, dtype))) input = np.zeros((T + pad, ) + M) if input is None else np.concatenate( (np.zeros((pad, ) + M), input)) mask = np.ones((T + pad, ) + D, dtype=bool) # Convert the discrete states to the range (1, ..., K_total) m = self.state_map K_total = len(m) _, starts = np.unique(m, return_index=True) z = starts[z] # Fill in the rest of the data for t in range(pad, pad + T): Pt = self.transitions.transition_matrices(data[t - 1:t + 1], input[t - 1:t + 1], mask=mask[t - 1:t + 1], tag=tag)[0] z[t] = npr.choice(K_total, p=Pt[z[t - 1]]) data[t] = self.observations.sample_x(m[z[t]], data[:t], input=input[t], tag=tag, with_noise=with_noise) # Collapse the states z = m[z] # Return the whole data if no prefix is given. # Otherwise, just return the simulated part. if prefix is None: return z, data else: return z[pad:], data[pad:]
def sample(self, n_samples=2000, observed_states=None, random_state=None): """Generate random samples from the self. Parameters ---------- n : int Number of samples to generate. observed_states : array If provided, states are not sampled. random_state: RandomState or an int seed A random number generator instance. If None is given, the object's random_state is used Returns ------- samples : array_like, length (``n_samples``) List of samples states : array_like, shape (``n_samples``) List of hidden states (accounting for tied states by giving them the same index) """ if random_state is None: random_state = self.random_state random_state = check_random_state(random_state) samples = np.zeros(n_samples) states = np.zeros(n_samples) if observed_states is None: startprob_pdf = np.exp(np.copy(self._log_startprob)) startdist = stats.rv_discrete( name='custm', values=(np.arange(startprob_pdf.shape[0]), startprob_pdf), seed=random_state) states[0] = startdist.rvs(size=1)[0] transmat_pdf = np.exp(np.copy(self._log_transmat)) transmat_cdf = np.cumsum(transmat_pdf, 1) nrand = random_state.rand(n_samples) for idx in range(1, n_samples): newstate = (transmat_cdf[states[idx - 1]] > nrand[idx - 1]).argmax() states[idx] = newstate else: states = observed_states mu = np.copy(self._mu_) precision = np.copy(self._precision_) for idx in range(n_samples): mean_ = self._mu_[states[idx]] var_ = np.sqrt(1 / precision[states[idx]]) samples[idx] = norm.rvs(loc=mean_, scale=var_, size=1, random_state=random_state) states = self._process_sequence(states) return samples, states
def get_pqsource(prob_label): """ Return (p, ds), a tuple of - p: a Density representing the distribution p - ds: a DataSource, each corresponding to one parameter setting. The DataSource generates sample from q. """ prob2tuples = { # H0 is true. vary d. P = Q = N(0, I) 'sg5': (density.IsotropicNormal(np.zeros(5), 1), data.DSIsotropicNormal(np.zeros(5), 1)), # P = N(0, I), Q = N( (0.2,..0), I) 'gmd5': (density.IsotropicNormal(np.zeros(5), 1), data.DSIsotropicNormal(np.hstack((0.2, np.zeros(4))), 1)), 'gmd1': (density.IsotropicNormal(np.zeros(1), 1), data.DSIsotropicNormal(np.ones(1) * 0.2, 1)), # P = N(0, I), Q = N( (1,..0), I) 'gmd100': (density.IsotropicNormal(np.zeros(100), 1), data.DSIsotropicNormal(np.hstack((1, np.zeros(99))), 1)), # Gaussian variance difference problem. Only the variance # of the first dimenion differs. d varies. 'gvd5': (density.Normal(np.zeros(5), np.eye(5)), data.DSNormal(np.zeros(5), np.diag(np.hstack( (2, np.ones(4)))))), 'gvd10': (density.Normal(np.zeros(10), np.eye(10)), data.DSNormal(np.zeros(10), np.diag(np.hstack((2, np.ones(9)))))), # Gaussian Bernoulli RBM. dx=50, dh=10. H0 is true 'gbrbm_dx50_dh10_v0': gaussbern_rbm_tuple(0, dx=50, dh=10, n=sample_size), # Gaussian Bernoulli RBM. dx=5, dh=3. H0 is true 'gbrbm_dx5_dh3_v0': gaussbern_rbm_tuple(0, dx=5, dh=3, n=sample_size), # Gaussian Bernoulli RBM. dx=50, dh=10. 'gbrbm_dx50_dh10_v1em3': gaussbern_rbm_tuple(1e-3, dx=50, dh=10, n=sample_size), # Gaussian Bernoulli RBM. dx=5, dh=3. Perturb with noise = 1e-2. 'gbrbm_dx5_dh3_v5em3': gaussbern_rbm_tuple(5e-3, dx=5, dh=3, n=sample_size), # Gaussian mixture of two components. Uniform mixture weights. # p = 0.5*N(0, 1) + 0.5*N(3, 0.01) # q = 0.5*N(-3, 0.01) + 0.5*N(0, 1) 'gmm_d1': (density.IsoGaussianMixture(np.array([[0], [3.0]]), np.array([1, 0.01])), data.DSIsoGaussianMixture(np.array([[-3.0], [0]]), np.array([0.01, 1]))), # p = N(0, 1) # q = 0.1*N([-10, 0,..0], 0.001) + 0.9*N([0,0,..0], 1) 'g_vs_gmm_d5': (density.IsotropicNormal(np.zeros(5), 1), data.DSIsoGaussianMixture(np.vstack((np.hstack( (0.0, np.zeros(4))), np.zeros(5))), np.array([0.0001, 1]), pmix=[0.1, 0.9])), 'g_vs_gmm_d2': (density.IsotropicNormal(np.zeros(2), 1), data.DSIsoGaussianMixture(np.vstack((np.hstack( (0.0, np.zeros(1))), np.zeros(2))), np.array([0.01, 1]), pmix=[0.1, 0.9])), 'g_vs_gmm_d1': (density.IsotropicNormal(np.zeros(1), 1), data.DSIsoGaussianMixture(np.array([[0.0], [0]]), np.array([0.01, 1]), pmix=[0.1, 0.9])), } if prob_label not in prob2tuples: raise ValueError('Unknown problem label. Need to be one of %s' % str(list(prob2tuples.keys()))) return prob2tuples[prob_label]
def adam_minimax(grad_both, init_params_max, init_params_min, callback=None, num_iters=100, step_size_max=0.001, step_size_min=0.001, b1=0.9, b2=0.999, eps=10**-8): """Adam modified to do minimiax optimization, for instance to help with training generative adversarial networks.""" def exponential_decay(step_size_max): if step_size_max > 0.001: step_size_max *= 0.999 return step_size_max x_max, unflatten_max = flatten(init_params_max) x_min, unflatten_min = flatten(init_params_min) m_max = np.zeros(len(x_max)) v_max = np.zeros(len(x_max)) m_min = np.zeros(len(x_min)) v_min = np.zeros(len(x_min)) for i in range(num_iters): print(i, datetime.datetime.now(), alphabetize(int(i / 10))) K = 3 g_max_uf, g_min_uf = grad_both(unflatten_max(x_max), unflatten_min(x_min), i) g_max, _ = flatten(g_max_uf) g_min, _ = flatten(g_min_uf) if callback: callback(unflatten_max(x_max), unflatten_min(x_min), i, unflatten_max(g_max), unflatten_min(g_min)) step_size_max = exponential_decay(step_size_max) m_max = (1 - b1) * g_max + b1 * m_max # First moment estimate. v_max = (1 - b2) * (g_max**2) + b2 * v_max # Second moment estimate. mhat_max = m_max / (1 - b1**(i + 1)) # Bias correction. vhat_max = v_max / (1 - b2**(i + 1)) x_max = x_max + step_size_max * mhat_max / (np.sqrt(vhat_max) + eps) m_min = (1 - b1) * g_min + b1 * m_min # First moment estimate. v_min = (1 - b2) * (g_min**2) + b2 * v_min # Second moment estimate. mhat_min = m_min / (1 - b1**(i + 1)) # Bias correction. vhat_min = v_min / (1 - b2**(i + 1)) x_min = x_min - step_size_min * mhat_min / (np.sqrt(vhat_min) + eps) for k in range(K - 1): if k <= 0: step_size_min_temp = step_size_min if k > 0: step_size_min_temp = step_size_min_temp * 0.50 g_max_uf, g_min_uf = grad_both(unflatten_max(x_max), unflatten_min(x_min), i) g_min, _ = flatten(g_min_uf) # Update discriminator (minimizer) m_min = (1 - b1) * g_min + b1 * m_min # First moment estimate. v_min = (1 - b2) * (g_min** 2) + b2 * v_min # Second moment estimate. mhat_min = m_min / (1 - b1**(i + 1)) # Bias correction. vhat_min = v_min / (1 - b2**(i + 1)) x_min = x_min - step_size_min_temp * mhat_min / ( np.sqrt(vhat_min) + eps) return unflatten_max(x_max), unflatten_min(x_min)
def Magnus_Integrator(t_start, t_stop, h0, x0, A, alpha, type_, order): T_0 = time.time() """ x0 = initial conditions t_start = start time t_stop = end time h0 = initial step size A = A(t) matrix function alpha = alpha generating function order = 4 or 6 also support Caley methods """ Ndim = x0.size x_ = np.zeros((1, Ndim)) # set up the array of x values t_ = np.zeros(1) # set up the array of t values t_[0] = t_start x_[0,:] = x0 h = h0 h_min = h0*(10**(-2)) h_max = 5*h0 n = 0 t = t_start # S = 0.99 # safety factor # if type_ == "Magnus": def M(time, hstep): M_ = linalg.expm(Omega(time, time+hstep, A, alpha, order)) return M_ elif type_ == "Caley": def M(time, hstep): Id = np.identity(Ndim) Om = Omega(time, time+hstep, A, alpha, order) if order == 4: C_ = Om*(Id - (1/12)*(Om**2)) elif order ==6: C_ = Om*(Id - (1/12)*(Om**2)*(1 - (1/10)*(Om**2))) M_ = np.linalg.inv(Id - 0.5*C_)*(Id + 0.5*C_) return M_ # while t <= t_stop: x_n = x_[n,:].reshape(Ndim, 1) Err_small = False h_new = h while Err_small == False: # compute the predictions using one step of h & two steps of h/2 #print("\r" + "trying step " + str(n) + " h=" + str(h) + " ...", end='') x_np1_0 = M(t, h) @ x_n x_np1_l = M(t+0.5*h, 0.5*h) @ (M(t, 0.5*h) @ x_n) # compute error Err = ferr(x_np1_0, x_np1_l) Err_max = epsilon*(rtol*np.abs(x_np1_l) + atol) #h*(A(t) @ x_n)[0,0]) # maximum error allowed Err_ratio = np.abs(np.std(Err / Err_max)) # if Err_ratio <= 1: h_new = h*S*np.power(np.abs(Err_ratio), -1.0/(order + 1)) # h*1.5 if h_new > 10*h: # limit how fast the step size can increase h_new = 10*h if h_new > h_max: # limit the maximum step size h_new = h_max Err_small = True # break loop elif Err_ratio > 1: h_new = h*S*np.power(np.abs(Err_ratio), -1.0/(order)) if h_new < 0.2*h: # limit how fast the step size decreases h_new = 0.2*h if h_new < h_min: # limit the minimum step size h_new = h_min Err_small = True # break loop elif h_new >= h_min: h = h_new t = t + h x_ = np.vstack((x_,x_np1_l.reshape(1, Ndim))) # add x_n+1 to the array of x values t_ = np.append(t_, t) # add t_n+1 to the array of t values n = n + 1 h = h_new if True: #np.round(((t-t_start)/(t_stop-t_start))*100000) % 1000 == 0: print("\r" + "integrated {:.1%}".format(float((t-t_start)/(t_stop-t_start))), end='') T = time.time() - T_0 print(" done in {:.5g}s".format(T)) return (t_, x_, T)
def RKF45_Integrator(t_start, t_stop, h0, x0, A): # An integrator using a 4(5) RKF method T_0 = time.time() """ x0 = initial conditions t_start = start time t_stop = end time n_step = number of steps A = A(t) matrix function """ Ndim = x0.size x_ = np.zeros((1, Ndim)) # set up the array of x values t_ = np.zeros(1) # set up the array of t values t_[0] = t_start x_[0,:] = x0 h = h0 h_min = h0*(10**(-2)) h_max = 5*h0 n = 0 t = t_start # S = 0.98 # safety factor # while t <= t_stop: x_n = x_[n,:].reshape(Ndim, 1) Err_small = False h_new = h while Err_small == False: # compute the predictions using 4th and 5th order RK methods test_M = np.matrix([[1, 2], [1, 2]]) k1 = np.dot(test_M,x_n) k1 = np.dot(h*A(t),x_n) k2 = h*A(t + 0.25*h) @ (x_n + 0.25*k1) k3 = h*A(t + (3/8)*h) @ (x_n + (3/32)*k1 + (9/32)*k2) k4 = h*A(t + (12/13)*h) @ (x_n + (1932/2197)*k1 - (7200/2197)*k2 + (7296/2197)*k3) k5 = h*A(t + h) @ (x_n + (439/216)*k1 - 8*k2 + (3680/513)*k3 - (845/4104)*k4) k6 = h*A(t + 0.5*h) @ (x_n - (8/27)*k1 + 2*k2 - (3544/2565)*k3 + (1859/4104)*k4 - (11/40)*k5) y_np1 = x_n + (25/216)*k1 + (1408/2565)*k3 + (2197/4101)*k4 - (11/40)*k5 z_np1 = x_n + (16/135)*k1 + (6656/12825)*k3 + (28561/56430)*k4 - (9/50)*k5 + (2/55)*k6 # Err = ferr(y_np1, z_np1) """ Err_max = ε(rtol*|z_np1| + atol) """ Err_max = epsilon_RK*(rtol_RK*np.abs(z_np1) + atol_RK) Err_ratio = np.asscalar(np.mean(Err / Err_max)) # if Err_ratio <= 1: h_new = h*S*np.power(Err_ratio, -1.0/5) #Delta = max(np.asscalar(max(Err)), epsilon_RK*0.1) #h_new = h*(epsilon_RK*h/Delta)**(1/4) if h_new > 10*h: # limit how fast the step size can increase h_new = 10*h if h_new > h_max: # limit the maximum step size h_new = h_max Err_small = True # break loop elif Err_ratio > 1: h_new = h*S*np.power(np.abs(Err_ratio), -1.0/4) #h_new = h*(epsilon_RK*h/np.asscalar(max(Err)))**(1/4) if h_new < 0.2*h: # limit how fast the step size decreases h_new = 0.2*h if h_new < h_min: # limit the minimum step size h_new = h_min Err_small = True # break loop elif h_new >= h_min: h = h_new t = t + h x_ = np.vstack((x_,z_np1.reshape(1, Ndim))) # add x_n+1 to the array of x values t_ = np.append(t_, t) # add t_n+1 to the array of t values n = n + 1 h = h_new if True: #np.round(((t-t_start)/(t_stop-t_start))*100000) % 1000 == 0: print("\r" + "integrated {:.1%}".format((t-t_start)/(t_stop-t_start)), end='') T = time.time() - T_0 print(" done in {:.5g}s".format(T)) return (t_, x_, T)
def _do_mstep(self, stats, params): # M-Step for startprob and transmat if 's' in params: startprob_ = self.startprob_prior + stats['start'] normalize(startprob_) self.startprob_ = np.where(self.startprob_ <= np.finfo(float).eps, self.startprob_, startprob_) if 't' in params: if self.n_tied == 0: transmat_ = self.transmat_prior + stats['trans'] normalize(transmat_, axis=1) self.transmat_ = np.where( self.transmat_ <= np.finfo(float).eps, self.transmat_, transmat_) else: transmat_ = np.zeros((self.n_components, self.n_components)) transitionCnts = stats['trans'] + self.transmat_prior transition_index = [ i * self.n_chain for i in range(self.n_unique) ] for b in range(self.n_unique): block = \ transitionCnts[self.n_chain * b : self.n_chain * (b + 1)][:] + 0. denominator_diagonal = np.sum(block) diagonal = 0.0 index_line = range(0, self.n_chain) index_row = range(self.n_chain * b, self.n_chain * (b + 1)) for l, r in zip(index_line, index_row): diagonal += (block[l][r]) for l, r in zip(index_line, index_row): block[l][r] = diagonal / denominator_diagonal self_transition = block[0][self.n_chain * b] denominator_off_diagonal = \ (np.sum(block[self.n_chain-1])) - self_transition template = block[self.n_chain - 1] + 0. for entry in range(len(template)): template[entry] = (template[entry] * (1 - self_transition)) \ / float(denominator_off_diagonal) template[(self.n_chain * (b + 1)) - 1] = 0. line_value = 1 - self_transition for entry in range(len(template)): line_value = line_value - template[entry] for index in transition_index: if index != (b * self.n_chain): block[self.n_chain - 1][index] = \ line_value + template[index] line = range(self.n_chain - 1) row = [ b * self.n_chain + i for i in range(1, self.n_chain) ] for x, y in zip(line, row): block[x][y] = 1 - self_transition transmat_[self.n_chain * b:self.n_chain * (b + 1)][:] = block self.transmat_ = np.copy(transmat_)
def ReLU(x): return np.maximum(x, np.zeros(1))
def outputs(weights, input_set, fence_set, output_set=None, return_pred_set=False): update_x_weights = parser.get(weights, 'update_x_weights') update_h_weights = parser.get(weights, 'update_h_weights') reset_x_weights = parser.get(weights, 'reset_x_weights') reset_h_weights = parser.get(weights, 'reset_h_weights') thidden_x_weights = parser.get(weights, 'thidden_x_weights') thidden_h_weights = parser.get(weights, 'thidden_h_weights') output_h_weights = parser.get(weights, 'output_h_weights') data_count = len(fence_set) - 1 feat_count = input_set.shape[0] ll = 0.0 n_i_track = 0 fence_base = fence_set[0] pred_set = None if return_pred_set: pred_set = np.zeros((output_count, input_set.shape[1])) # loop through sequences and time steps for data_iter in range(data_count): hiddens = copy(parser.get(weights, 'init_hiddens')) fence_post_1 = fence_set[data_iter] - fence_base fence_post_2 = fence_set[data_iter + 1] - fence_base time_count = fence_post_2 - fence_post_1 curr_input = input_set[:, fence_post_1:fence_post_2] for time_iter in range(time_count): hiddens = update( np.expand_dims(np.hstack((curr_input[:, time_iter], 1)), axis=0), hiddens, update_x_weights, update_h_weights, reset_x_weights, reset_h_weights, thidden_x_weights, thidden_h_weights) # IF WE WANT PREDICTION, WE HAVE TO TURN SIGMOID TO LINEAR if output_set is not None: # subtract a small number so -1 out_proba = sigmoid( np.sign(output_set[:, n_i_track] - 1e-3) * np.dot(hiddens, output_h_weights)) out_lproba = safe_log(out_proba) ll += np.sum(out_lproba) else: out_proba = sigmoid(np.dot(hiddens, output_h_weights)) out_lproba = safe_log(out_proba) # if output_set is not None: # # subtract a small number so -1 # out_proba = linear(np.sign(output_set[:, n_i_track] - 1e-3) * # np.dot(hiddens, output_h_weights)) # out_lproba = safe_log(out_proba) # ll += np.sum(out_lproba) # else: # out_proba = linear(np.dot(hiddens, output_h_weights)) # out_lproba = safe_log(out_proba) if return_pred_set: pred_set[:, n_i_track] = out_lproba[0] n_i_track += 1 return ll, pred_set
def plot_r_and_t_Omega(): #Pick some incommensurate spacing between the atoms. #This will ensure that no atoms will be exactly on #the nodes of the standing wave kd = 0.5 * np.pi #We treat Delta, Deltac, g1d and gprime here as if they #were scaled by the total decay rate #\Gamma=\Gamma_{1D}+\Gamma'. #Thus Delta is actually Delta/\Gamma etc. min_Omega = 1 max_Omega = 100 num_Omega = 1000 NAtoms = 10000 g1d = 0.05 #Note that \Gamma'/\Gamma=1-\Gamma_{1D}/\Gamma. gprime = 1 - g1d gm = 0 Deltac = -10 usetex() p.figure(figsize=(2 * 3.3, 2.5)) OmegaValues = np.linspace(min_Omega, max_Omega, num_Omega) r = np.zeros(num_Omega, dtype=np.float64) t = np.zeros(num_Omega, dtype=np.float64) r_imp = np.zeros(num_Omega, dtype=np.float64) t_imp = np.zeros(num_Omega, dtype=np.float64) #r_diff2 = np.zeros(num_Omega, dtype=np.complex128) t_diff2 = np.zeros(num_Omega, dtype=np.complex128) deltaStart = 0.0025 for n, Omega in enumerate(OmegaValues): delta = delta_for_minimal_r_0(deltaStart, NAtoms, kd, g1d, gprime, gm, Deltac, Omega) deltaStart = delta Delta = delta + Deltac #r_0_func = lambda x: r_0(x, NAtoms, kd, g1d, gprime, gm, Deltac, Omega) #r_0_grad1 = grad(r_0_func) #r_0_grad2 = grad(r_0_grad1) t_0_func = lambda x: t_0(x, NAtoms, kd, g1d, gprime, gm, Deltac, Omega) t_0_grad1 = grad(t_0_func) t_0_grad2 = grad(t_0_grad1) MensembleStandingWave\ = ensemble_transfer_matrix_pi_half(NAtoms, g1d, gprime, gm, Delta, Deltac, Omega) MHalfEnsembleStandingWave\ = ensemble_transfer_matrix_pi_half(NAtoms/2, g1d, gprime, gm, Delta, Deltac, Omega) M_impurity_cell = impurity_unit_cell_pi_half(g1d, gprime, gm, Delta, Deltac, Omega) MensembleStandingWaveImpurity = MHalfEnsembleStandingWave * M_impurity_cell * MHalfEnsembleStandingWave r[n] = np.abs(-MensembleStandingWave[1,0]\ /MensembleStandingWave[1,1])**2 t[n] = np.abs(1.0 / MensembleStandingWave[1, 1])**2 r_imp[n] = np.abs(-MensembleStandingWaveImpurity[1,0]\ /MensembleStandingWaveImpurity[1,1])**2 t_imp[n] = np.abs(1.0 / MensembleStandingWaveImpurity[1, 1])**2 #r_diff2[n] = r_0_grad2(delta) t_diff2[n] = t_0_grad2(delta) t_analytical = (1 - (g1d * (1 - g1d) * NAtoms) / (16 * Deltac**2) - (1 - g1d) * OmegaValues**2 * np.pi**2 / (2 * Deltac**2 * g1d * NAtoms))**2 r_imp_analytical = (1 - 4 * np.pi**2 * Deltac**2 * (1 - g1d) / (g1d**3 * NAtoms**2) + 32 * np.pi**4 * Deltac**2 * (1 - g1d) * OmegaValues**2 / (g1d**5 * NAtoms**4))**2 w_analytical = 32 * np.sqrt(2) * Deltac**2 * OmegaValues**2 * np.pi**2 / ( g1d**3 * NAtoms**3) ax = p.subplot(121) #p.plot(OmegaValues, r, color='#000099', linestyle='-', # label=r'$|r_0|^2$', linewidth=common_line_width) p.plot(OmegaValues, t, color='#009900', linestyle=':', label=r'$|t_0|^2$ (full)', linewidth=common_line_width) p.plot(OmegaValues, t_analytical - t_analytical[0] + t[0], color='k', linestyle='-.', label=r'$|t_0|^2$ (approx)', linewidth=common_line_width) p.plot(OmegaValues, r_imp, color='#990000', linestyle='--', label=r'$|r_1|^2$ (full)', linewidth=common_line_width) p.plot(OmegaValues, r_imp_analytical - r_imp_analytical[0] + r_imp[0], color='#000099', linestyle='-', label=r'$|r_1|^2$ (approx)', linewidth=common_line_width) #p.plot(OmegaValues, t_imp, 'k-.', # label=r'$|t_1|^2$', linewidth=common_line_width) p.xlim(min_Omega, max_Omega) p.ylim(0.0, 1) p.xlabel(r'$\Omega_0/\Gamma$') p.title('(a)') p.legend(loc='lower right') ax = p.subplot(122) #r(\delta)=r(\delta_{res})+(2/w^2)(\delta-\delta_{res})^2 #Hence (d/d\delta)r(\delta) at \delta=\delta_{res} #is equal to 4/w^2 #Upon solving r_diff2=4/w^2, we get w=\sqrt{4/r_diff2} p.loglog(OmegaValues, np.real(np.sqrt(4.0 / t_diff2)), color='#009900', linestyle=':', label=r"full", linewidth=common_line_width) p.loglog(OmegaValues, w_analytical, color='k', linestyle='-.', label=r"approximate", linewidth=common_line_width) p.ylabel(r'$w/\Gamma$') p.xlabel(r'$\Omega_0/\Gamma$') p.legend(loc='lower right') p.title('(b)') p.tight_layout(pad=0.2)
ALPHA_BY_2 = PI_2 * -2.36e-1 KAPPA_BY_2 = PI_2 * -3.7e-6 CHIP_BY_2 = PI_2 * -1.9e-6 T1_T = 1.7e5 #ns TP_T = 4.3e4 T1_C = 2.7e6 # Second, we define the system. CAVITY_STATE_COUNT = 2 TRANSMON_STATE_COUNT = 2 HILBERT_SIZE = CAVITY_STATE_COUNT * TRANSMON_STATE_COUNT A = get_annihilation_operator(CAVITY_STATE_COUNT) A_DAGGER = get_creation_operator(CAVITY_STATE_COUNT) A_ID = anp.eye(CAVITY_STATE_COUNT) # Notice how the state vectors are specified as column vectors. CAVITY_VACUUM = anp.zeros((CAVITY_STATE_COUNT, 1)) CAVITY_ZERO = anp.copy(CAVITY_VACUUM) CAVITY_ZERO[0, 0] = 1 CAVITY_ONE = anp.copy(CAVITY_VACUUM) CAVITY_ONE[1, 0] = 1 B = get_annihilation_operator(TRANSMON_STATE_COUNT) B_DAGGER = get_creation_operator(TRANSMON_STATE_COUNT) B_ID = anp.eye(TRANSMON_STATE_COUNT) TRANSMON_VACUUM = anp.zeros((TRANSMON_STATE_COUNT, 1)) TRANSMON_ZERO = anp.copy(TRANSMON_VACUUM) TRANSMON_ZERO[0, 0] = 1 TRANSMON_ONE = anp.copy(TRANSMON_VACUUM) TRANSMON_ONE[1, 0] = 1 # Next, we define the system hamiltonian. # qoc requires you to specify your hamiltonian as a function of control parameters
def diffusion_resampling(process, return_full_path=True, verbose=False, domain_enforcer=None): """Returns the paths of the particles in the format: total_iter, num_particles, tau, dim""" p_start = get_particles(process) p_gamma = lambda t: process["gamma"] p_temperature = lambda t: process["temperature"] p_num_particles = len(p_start) p_epsilon = process["epsilon"] total_iter, tau = process["total_iter"], process["tau"] dim = len(p_start) # get potential_function and gradient U, grad_U = get_potential(process) # get weight_function p_weight_func = get_weight_function(process) # get resample_function p_resample_func = get_resample_function(process) # get domain_enforcer x_range = process["x_range"] if process["domain_enforcer"]["name"] == "hyper_cube_enforcer": domain_enforcer_strength = process["domain_enforcer"]["params"][ "strength"] domain_enforcer = hyper_cube_enforcer(x_range[0], x_range[1], domain_enforcer_strength) else: raise ValueError("Does not support given function {}".format( process["weight_function"]["name"])) # init num_particles all_paths = [] p_weights = np.zeros(len(p_start)) curr_paths = np.array([[np.array(p)] for p in p_start]) # Which t to use for diffusion? for t in range(total_iter): for t_tau in range(tau): # --- diffusion step --- x_curr = np.array(curr_paths[:, -1]) x_next = x_curr + p_gamma(t) * ( -grad_U(x_curr.T).T) + p_temperature(t) * np.random.normal( size=x_curr.shape) if domain_enforcer is not None: x_next, went_outside_domain = domain_enforcer(x_next) # ---- if return_full_path or (curr_paths.shape[1] == 1): curr_paths = np.concatenate([ curr_paths, x_next.reshape( [curr_paths.shape[0], 1, curr_paths.shape[2]]) ], axis=1) else: curr_paths[:, -1] = x_next.reshape( [curr_paths.shape[0], curr_paths.shape[2]]) # weight update p_weights = p_weight_func(U, grad_U, x_curr.T, p_weights) # add paths all_paths.append(curr_paths) end_points = curr_paths[:, -1] # resample particles new_starting = list(p_resample_func(p_weights, end_points)) curr_paths = np.array([[p] for p in new_starting]) p_weights = np.zeros(len(p_start)) return np.array(all_paths)