def vjp_all(g): vjp_y = g[-1, :] vjp_t0 = 0 time_vjp_list = [] vjp_args = np.zeros(np.size(flat_args)) for i in range(T - 1, 0, -1): # Compute effect of moving measurement time. vjp_cur_t = np.dot(func(yt[i, :], t[i], *func_args), g[i, :]) time_vjp_list.append(vjp_cur_t) vjp_t0 = vjp_t0 - vjp_cur_t # Run augmented system backwards to the previous observation. aug_y0 = np.hstack((yt[i, :], vjp_y, vjp_t0, vjp_args)) aug_ans = odeint(augmented_dynamics, aug_y0, np.array([t[i], t[i - 1]]), tuple((flat_args,)), **kwargs) _, vjp_y, vjp_t0, vjp_args = unpack(aug_ans[1]) # Add gradient from current output. vjp_y = vjp_y + g[i - 1, :] time_vjp_list.append(vjp_t0) vjp_times = np.hstack(time_vjp_list)[::-1] return None, vjp_y, vjp_times, unflatten(vjp_args)
def auto_diff(self, params, unknowns, resids): plist = params.keys() ulist = unknowns.keys() def wrap(inputs): _unknowns = {i : unknowns[i] for i in unknowns} _resids = {} _params = dict(zip(plist, inputs)) self.solve_nonlinear(_params, _unknowns, resids) return [_unknowns[unk] for unk in ulist] gradfunc = jacobian(wrap) inp = [params[val] for val in plist] df = gradfunc(inp) J = {} for i, unk in enumerate(ulist): for k, inp in enumerate(plist): gshape = (np.size(unknowns[unk]), np.size(params[inp])) J[unk, inp] = np.reshape(df[i][k], gshape, order="C") return J
def region_coloring(self,region,ax): #### color first regions #### # generate input range for functions minx = min(min(self.x[:,0]),min(self.x[:,1])) maxx = max(max(self.x[:,0]),max(self.x[:,1])) gapx = (maxx - minx)*0.1 minx -= gapx maxx += gapx # plot over range r = np.linspace(minx,maxx,200) x1_vals,x2_vals = np.meshgrid(r,r) x1_vals.shape = (len(r)**2,1) x2_vals.shape = (len(r)**2,1) o = np.ones((len(r)**2,1)) x = np.concatenate([o,x1_vals,x2_vals],axis = 1) ### for region 1, determine points that are uniquely positive for each classifier ### ind_set = [] y = np.dot(self.W,x.T) num_classes = np.size(np.unique(self.y)) if region == 1 or region == 'all': for i in range(0,num_classes): class_inds = np.arange(num_classes) class_inds = np.delete(class_inds,(i),axis = 0) # loop over non-current classifier ind = np.argwhere(y[class_inds[0]] < 0).tolist() ind = [s[0] for s in ind] for j in range(1,len(class_inds)): c_ind = class_inds[j] ind2 = np.argwhere(y[c_ind] < 0).tolist() ind2 = [s[0] for s in ind2] ind = [s for s in ind if s in ind2] ind2 = np.argwhere(y[i] > 0).tolist() ind2 = [s[0] for s in ind2] ind = [s for s in ind if s in ind2] # plot polygon over region defined by ind x1_ins = np.asarray([x1_vals[s] for s in ind]) x1_ins.shape = (len(x1_ins),1) x2_ins = np.asarray([x2_vals[s] for s in ind]) x2_ins.shape = (len(x2_ins),1) h = np.concatenate((x1_ins,x2_ins),axis = 1) vertices = ConvexHull(h).vertices poly = [h[v] for v in vertices] polygon = Polygon(poly, True) patches = [] patches.append(polygon) p = PatchCollection(patches, alpha=0.2,color = self.colors[i]) ax.add_collection(p) if region == 2 or region == 'all': for i in range(0,num_classes): class_inds = np.arange(num_classes) class_inds = np.delete(class_inds,(i),axis = 0) # loop over non-current classifier ind = np.argwhere(y[class_inds[0]] > 0).tolist() ind = [s[0] for s in ind] for j in range(1,len(class_inds)): c_ind = class_inds[j] ind2 = np.argwhere(y[c_ind] > 0).tolist() ind2 = [s[0] for s in ind2] ind = [s for s in ind if s in ind2] ind2 = np.argwhere(y[i] < 0).tolist() ind2 = [s[0] for s in ind2] ind = [s for s in ind if s in ind2] # plot polygon over region defined by ind x1_ins = np.asarray([x1_vals[s] for s in ind]) x1_ins.shape = (len(x1_ins),1) x2_ins = np.asarray([x2_vals[s] for s in ind]) x2_ins.shape = (len(x2_ins),1) o = np.ones((len(x2_ins),1)) h = np.concatenate((o,x1_ins,x2_ins),axis = 1) # determine regions dominated by one classifier or the other vals = [] for c in class_inds: w = self.W[int(c)] nv = np.dot(w,h.T) vals.append(nv) vals = np.asarray(vals) vals.shape = (len(class_inds),len(h)) ind = np.argmax(vals,axis = 0) for j in range(len(class_inds)): # make polygon for each subregion ind1 = np.argwhere(ind == j) x1_ins2 = np.asarray([x1_ins[s] for s in ind1]) x1_ins2.shape = (len(x1_ins2),1) x2_ins2 = np.asarray([x2_ins[s] for s in ind1]) x2_ins2.shape = (len(x2_ins2),1) h = np.concatenate((x1_ins2,x2_ins2),axis = 1) # find convex hull of points vertices = ConvexHull(h).vertices poly = [h[v] for v in vertices] polygon = Polygon(poly, True) patches = [] patches.append(polygon) c = class_inds[j] p = PatchCollection(patches, alpha=0.2,color = self.colors[c]) ax.add_collection(p) if region == 3 or region == 'all': # find negative zone of all classifiers ind = np.argwhere(y[0] < 0).tolist() ind = [s[0] for s in ind] for i in range(1,num_classes): ind2 = np.argwhere(y[i] < 0).tolist() ind2 = [s[0] for s in ind2] ind = [s for s in ind if s in ind2] # loop over negative zone, find max area of each classifier x1_ins = np.asarray([x1_vals[s] for s in ind]) x1_ins.shape = (len(x1_ins),1) x2_ins = np.asarray([x2_vals[s] for s in ind]) x2_ins.shape = (len(x2_ins),1) o = np.ones((len(x2_ins),1)) h = np.concatenate((o,x1_ins,x2_ins),axis = 1) # determine regions dominated by one classifier or the other vals = [] for c in range(num_classes): w = self.W[c] nv = np.dot(w,h.T) vals.append(nv) vals = np.asarray(vals) vals.shape = (num_classes,len(h)) ind = np.argmax(vals,axis = 0) # loop over each class, construct polygon region for each for c in range(num_classes): # make polygon for each subregion ind1 = np.argwhere(ind == c) x1_ins2 = np.asarray([x1_ins[s] for s in ind1]) x1_ins2.shape = (len(x1_ins2),1) x2_ins2 = np.asarray([x2_ins[s] for s in ind1]) x2_ins2.shape = (len(x2_ins2),1) h = np.concatenate((x1_ins2,x2_ins2),axis = 1) # find convex hull of points vertices = ConvexHull(h).vertices poly = [h[v] for v in vertices] polygon = Polygon(poly, True) patches = [] patches.append(polygon) p = PatchCollection(patches, alpha=0.2,color = self.colors[c]) ax.add_collection(p)
def fixed_points(rnn, inp, num_points=1, eps=0.01, opt_iters=10000, thresh=1, max_tries=100, rand_init=1, init_scale=5, plot_loss=0): '''This function uses the trained parameters to find num_points fixed points. It does a gradient descent to minimize q(x), which is analagous to the energy of the system. To just plot the gradient descent loss and step size for finding a single fixed point, set the plot_loss flag to 1. Inputs: rnn: Should be a JazNet class object. inp: A fixed value for the input(s). Can just be a list (e.g. [1,0]) num_points: Number of points to find (if plot_loss=0) eps: Epsilon value that scales the step size opt_iters: How many iterations to run to try to converge on a fixed point thresh: Threshold for the norm of the network activity before calling it a fixed point rand_init: Randomly pick a starting point if 1 (default), otherwise go with the network's current activity. plot_loss: Will result in only finding one fixed point. Shows how loss function/step size changes. Default 0 Outputs: all_points: Gives activity for all fixed points found in a num_points-by-N array fp_outputs: Network output at each fixed point. Note: Should change this depending on whether network uses tanh of activities for outpus, or if it has biases. trajectories: List with num_points elements, where each element is a TxN array, where T is the number of steps it took to find the fixed point and N is the number of neurons. ''' def output(x): return np.dot(np.tanh(x), rnn_par['out_weights']) def F(x): return (-x + np.dot(np.tanh(x), rnn_par['rec_weights']) + np.dot(inp, rnn_par['inp_weights']) + rnn_par['bias']) def q(x): return 1 / 2 * np.linalg.norm(F(x))**2 def find_point(inp, opt_iters, eps): loss = [] stepsize = [] x_traj = [] if rand_init: x = np.random.randn( rnn.act.size ) * init_scale # The randomized initial activity needs to be big enough to relax to interesting points else: x = np.squeeze(rnn.act) for i in range(opt_iters): loss.append(q(x)) if loss[i] < thresh: break step = eps * loss_grad(x) stepsize.append(np.linalg.norm(step)) x = x - step x_traj.append(x) return x, loss, stepsize, x_traj start = time.time() rnn_par = rnn.rnn_par # Extract the parameters loss_grad = grad(q) if plot_loss: # To see the optimization process to find one fixed point x, loss, stepsize, x_traj = find_point(inp, opt_iters, eps) plt.figure() plt.subplot(1, 3, 1) plt.plot(loss[-100:-1]) plt.title('Loss, last 100') plt.subplot(1, 3, 2) plt.plot(loss) plt.xlabel('Iteration') plt.title('Loss, all') plt.subplot(1, 3, 3) plt.plot(stepsize) plt.xlabel('Iteration') plt.title('Step size') plt.show() print('Last loss:', loss[-1]) else: # For finding a bunch of fixed points all_points = np.zeros((num_points, np.size(rnn.act))) fp_outputs = np.zeros((num_points, rnn_par['out_weights'].shape[1])) trajectories = [] for p in range(num_points): endloss = 1000 # Some big value above the threshold tries = 0 while endloss > thresh: if tries < max_tries: x, loss, stepsize, x_traj = find_point(inp, opt_iters, eps) endloss = loss[-1] tries += 1 else: print('Unsuccessful run; error=%g' % endloss) raise TimeoutError('No fixed points found in %d tries' % max_tries) all_points[p, :] = x fp_outputs[p] = output(x) trajectories.append(np.array(x_traj)) print('.', end="") finish = time.time() print('Done with fixed points in %d seconds' % (finish - start)) return all_points, fp_outputs, trajectories
def relu(self,w): cost = np.sum(np.maximum(0,-self.y*self.model(self.x,w))) return cost/float(np.size(self.y))
def least_absolute_deviations(self,w): cost = np.sum(np.abs(self.model(self.x,w) - self.y)) return cost/float(np.size(self.y))
def NN_diffusion(nx, nt, iterations, num_hidden_neurons, learning_rate): tf.reset_default_graph() #set a seed to get the same resuls from every run tf.set_random_seed(4155) x_ = np.linspace(0, 1, nx) t_ = np.linspace(0, 1, nt) X, T = np.meshgrid(x_, t_) x = X.ravel() t = T.ravel() #Construct Neural network zeros = tf.reshape(tf.convert_to_tensor(np.zeros(x.shape)), shape=(-1, 1)) x = tf.reshape(tf.convert_to_tensor(x), shape=(-1, 1)) t = tf.reshape(tf.convert_to_tensor(t), shape=(-1, 1)) total_points = tf.concat([x, t], 1) #input layer #number of hidden layers num_hidden_layers = len(num_hidden_neurons) #print('hidden layers:',num_hidden_layers) X = tf.convert_to_tensor(X) T = tf.convert_to_tensor(T) #construct the network #layer structures with tf.name_scope('dnn'): num_hidden_layers = np.size(num_hidden_neurons) previous_layer = total_points for l in range(num_hidden_layers): current_layer = tf.layers.dense(previous_layer, num_hidden_neurons[l], name=('hidden{}'.format(l + 1)), activation=tf.nn.sigmoid) previous_layer = current_layer dnn_output = tf.layers.dense(previous_layer, 1, name='output', activation=None) #Define the cost function with tf.name_scope('loss'): g_trial = (1 - t) * u(x) + x * (1 - x) * t * dnn_output g_trial_d2x = tf.gradients(tf.gradients(g_trial, x), x) g_trial_dt = tf.gradients(g_trial, t) loss = tf.losses.mean_squared_error(zeros, g_trial_dt[0] - g_trial_d2x[0]) #Defining optimizer with tf.name_scope('train'): optimizer = tf.train.AdamOptimizer(learning_rate) training_op = optimizer.minimize(loss) #Define a node that initializes all of the other nodes in the computational graph init = tf.global_variables_initializer() #g_analytic = tf.sin(np.pi*x)*tf.exp(-np.pi*np.pi*t) g_analytic = u_analytic(x, t) g_dnn = None start = time.time() #The execution phase with tf.Session() as sess: #intialtize the initial cost init.run() #training of the network for i in range(iterations): sess.run(training_op) #store the results g_analytic = g_analytic.eval() #analytic solution g_dnn = g_trial.eval() #Neural network solution cost = loss.eval() #cost evaluation stop = time.time() print('time duration:', stop - start) """ #compare with analytical solution diff = np.abs(g_analytic - g_dnn) max_diff =np.max(diff) print('max absolute difference between the analytical and the tensorflow: ', max_diff) """ #statistical computations r2 = r2_score(g_analytic, g_dnn) mse = mean_squared_error(g_analytic, g_dnn) print('R2:', r2) print('MSE:', mse) G_analytic = g_analytic.reshape((nt, nx)) G_dnn = g_dnn.reshape((nt, nx)) #compare with analytical solution diff = np.abs(G_analytic - G_dnn) max_diff = np.max(diff) print( 'max absolute difference between the analytical and the tensorflow: ', max_diff) X, T = np.meshgrid(x_, t_) fig = plt.figure() ax = fig.gca(projection='3d') ax.set_title( 'Solution from the deep neural network with %d layer \n and 50 neurons within hidden layer' % len(num_hidden_neurons)) s = ax.plot_surface(X, T, G_dnn, linewidth=0, antialiased=False, cmap=cm.viridis) ax.set_xlabel('Time $t$') ax.set_ylabel('Position $x$') #plt.savefig('solution_deep_nn_new.png') fig = plt.figure() ax = fig.gca(projection='3d') ax.set_title( 'Analytical solution of diffusion equation with 4 hidden layers \n and 50 neurons within hidden layer' ) s = ax.plot_surface(X, T, G_analytic, linewidth=0, antialiased=False, cmap=cm.viridis) ax.set_xlabel('Time $t$') ax.set_ylabel('Position $x$') #plt.savefig('analytical_solution_nn_new.png') fig = plt.figure() ax = fig.gca(projection='3d') ax.set_title( 'Difference between the numerical and analytical solution, \n with 4 hidden layers and 50 neurons within hidden layer' ) s = ax.plot_surface(X, T, diff, linewidth=0, antialiased=False, cmap=cm.viridis) ax.set_xlabel('Time $t$') ax.set_ylabel('Position $x$') #plt.savefig('difference_nn_new.png') plt.show() """ # Take some slices of the 3D plots just to see the solutions at particular times indx1 = 0 indx2 = int(nt/2) indx3 = nt-1 t1 = t_[indx1] t2 = t_[indx2] t3 = t_[indx3] # Slice the results from the DNN res1 = g_dnn[:,indx1] res2 = g_dnn[:,indx2] res3 = g_dnn[:,indx3] # Slice the analytical results res_analytical1 = G_analytical[:,indx1] res_analytical2 = G_analytical[:,indx2] res_analytical3 = G_analytical[:,indx3] # Plot the slices plt.figure() plt.title("Computed solutions at time = %g"%t1) plt.plot(x_, res1) plt.plot(x_,res_analytical1) plt.legend(['dnn','analytical']) plt.savefig('computed_solution_nn_t1.png') plt.figure() plt.title("Computed solutions at time = %g"%t2) plt.plot(x_, res2) plt.plot(x_,res_analytical2) plt.legend(['dnn','analytical']) plt.savefig('computed_solution_nn_t2.png') plt.figure() plt.title("Computed solutions at time = %g"%t3) plt.plot(x_, res3) plt.plot(x_,res_analytical3) plt.legend(['dnn','analytical']) plt.savefig('computed_solution_nn_t3.png') plt.show() """ return diff
def plot_fit_and_feature_space(self,w,model,feat,**kwargs): # construct figure fig, axs = plt.subplots(1, 3, figsize=(9,4)) # create subplot with 2 panels gs = gridspec.GridSpec(1, 2, width_ratios=[1,1]) ax1 = plt.subplot(gs[0]); ax2 = plt.subplot(gs[1]); view = [20,20] if 'view' in kwargs: view = kwargs['view'] ##### plot left panel in original space #### # scatter points xmin,xmax,ymin,ymax = self.scatter_pts_2d(self.x,ax1) # clean up panel ax1.set_xlim([xmin,xmax]) ax1.set_ylim([ymin,ymax]) # label axes ax1.set_xlabel(r'$x$', fontsize = 16) ax1.set_ylabel(r'$y$', rotation = 0,fontsize = 16,labelpad = 10) # create fit s = np.linspace(xmin,xmax,300)[np.newaxis,:] normalizer = lambda a: a if 'normalizer' in kwargs: normalizer = kwargs['normalizer'] t = model(normalizer(s),w) ax1.plot(s.flatten(),t.flatten(),linewidth = 4,c = 'k',zorder = 0) ax1.plot(s.flatten(),t.flatten(),linewidth = 2,c = 'lime',zorder = 0) #### plot fit in transformed feature space ##### # check if feature transform has internal parameters x_transformed = 0 sig = signature(feat) if len(sig.parameters) == 2: if np.shape(w)[1] == 1: x_transformed = feat(normalizer(self.x),w) else: x_transformed = feat(normalizer(self.x),w[0]) else: x_transformed = feat(normalizer(self.x)) # two dimensional transformed feature space if x_transformed.shape[0] == 1: s = np.linspace(xmin,xmax,300)[np.newaxis,:] # scatter points xmin,xmax,ymin,ymax = self.scatter_pts_2d(x_transformed,ax2) # produce plot s2 = copy.deepcopy(s) if len(sig.parameters) == 2: if np.shape(w)[1] == 1: s2 = feat(normalizer(s),w) else: s2 = feat(normalizer(s),w[0]) else: s2 = feat(normalizer(s)) t = model(normalizer(s),w) ax2.plot(s2.flatten(),t.flatten(),linewidth = 4,c = 'k',zorder = 0) ax2.plot(s2.flatten(),t.flatten(),linewidth = 2,c = 'lime',zorder = 0) # label axes ax2.set_xlabel(r'$f\left(x,\mathbf{w}^{\star}\right)$', fontsize = 16) ax2.set_ylabel(r'$y$', rotation = 0,fontsize = 16,labelpad = 10) # three dimensional transformed feature space if x_transformed.shape[0] == 2: # create panel ax2 = plt.subplot(gs[1],projection = '3d'); s = np.linspace(xmin,xmax,100)[np.newaxis,:] # plot data in 3d xmin,xmax,xmin1,xmax1,ymin,ymax = self.scatter_3d_points(x_transformed,ax2) # create and plot fit s2 = copy.deepcopy(s) if len(sig.parameters) == 2: s2 = feat(normalizer(s),w[0]) else: s2 = feat(normalizer(s)) # reshape for plotting a = s2[0,:] b = s2[1,:] a = np.linspace(xmin,xmax,100) b = np.linspace(xmin1,xmax1,100) a,b = np.meshgrid(a,b) # get firstem a.shape = (1,np.size(s)**2) f1 = feat(normalizer(a))[0,:] # secondm b.shape = (1,np.size(s)**2) f2 = feat(normalizer(b))[1,:] # tack a 1 onto the top of each input point all at once c = np.vstack((a,b)) o = np.ones((1,np.shape(c)[1])) c = np.vstack((o,c)) r = (np.dot(c.T,w)) # various a.shape = (np.size(s),np.size(s)) b.shape = (np.size(s),np.size(s)) r.shape = (np.size(s),np.size(s)) ax2.plot_surface(a,b,r,alpha = 0.1,color = 'lime',rstride=15, cstride=15,linewidth=0.5,edgecolor = 'k') ax2.set_xlim([np.min(a),np.max(a)]) ax2.set_ylim([np.min(b),np.max(b)]) ''' a,b = np.meshgrid(t1,t2) a.shape = (1,np.size(s)**2) b.shape = (1,np.size(s)**2) ''' ''' c = np.vstack((a,b)) o = np.ones((1,np.shape(c)[1])) c = np.vstack((o,c)) # tack a 1 onto the top of each input point all at once r = (np.dot(c.T,w)) a.shape = (np.size(s),np.size(s)) b.shape = (np.size(s),np.size(s)) r.shape = (np.size(s),np.size(s)) ax2.plot_surface(a,b,r,alpha = 0.1,color = 'lime',rstride=15, cstride=15,linewidth=0.5,edgecolor = 'k') ''' # label axes #self.move_axis_left(ax2) ax2.set_xlabel(r'$f_1(x)$', fontsize = 12,labelpad = 5) ax2.set_ylabel(r'$f_2(x)$', rotation = 0,fontsize = 12,labelpad = 5) ax2.set_zlabel(r'$y$', rotation = 0,fontsize = 12,labelpad = 0) self.move_axis_left(ax2) ax2.xaxis.set_major_formatter(FormatStrFormatter('%.1f')) ax2.yaxis.set_major_formatter(FormatStrFormatter('%.1f')) ax2.view_init(view[0],view[1])
def plot_cost_histories(self, histories, start, **kwargs): # plotting colors colors = ['k', 'magenta', 'aqua', 'blueviolet', 'chocolate'] # initialize figure fig = plt.figure(figsize=(10, 3)) # create subplot with 1 panel gs = gridspec.GridSpec(1, 1) ax = plt.subplot(gs[0]) # any labels to add? labels = [' ', ' '] if 'labels' in kwargs: labels = kwargs['labels'] # plot points on cost function plot too? points = False if 'points' in kwargs: points = kwargs['points'] # run through input histories, plotting each beginning at 'start' iteration for c in range(len(histories)): history = histories[c] label = 0 if c == 0: label = labels[0] else: label = labels[1] # check if a label exists, if so add it to the plot if np.size(label) == 0: ax.plot(np.arange(start, len(history), 1), history[start:], linewidth=3 * (0.8)**(c), color=colors[c]) else: ax.plot(np.arange(start, len(history), 1), history[start:], linewidth=3 * (0.8)**(c), color=colors[c], label=label) # check if points should be plotted for visualization purposes if points == True: ax.scatter(np.arange(start, len(history), 1), history[start:], s=90, color=colors[c], edgecolor='w', linewidth=2, zorder=3) # clean up panel xlabel = 'step $k$' if 'xlabel' in kwargs: xlabel = kwargs['xlabel'] ylabel = r'$g\left(\mathbf{w}^k\right)$' if 'ylabel' in kwargs: ylabel = kwargs['ylabel'] ax.set_xlabel(xlabel, fontsize=14) ax.set_ylabel(ylabel, fontsize=14, rotation=0, labelpad=25) if np.size(label) > 0: anchor = (1, 1) if 'anchor' in kwargs: anchor = kwargs['anchor'] plt.legend(loc='upper right', bbox_to_anchor=anchor) #leg = ax.legend(loc='upper left', bbox_to_anchor=(1.02, 1), borderaxespad=0) ax.set_xlim([start - 0.5, len(history) - 0.5]) # fig.tight_layout() plt.show()
def softmax(w,x,y,beta): # compute cost over batch cost = np.sum(beta*np.log(1 + np.exp(-y*model(x,w)))) return cost/float(np.size(y))
def d_ll(x, num_peds, ess, robot_mu_x, robot_mu_y, ped_mu_x, ped_mu_y, \ cov_robot_x, cov_robot_y, inv_cov_robot_x, inv_cov_robot_y, \ cov_ped_x, cov_ped_y, inv_cov_ped_x, inv_cov_ped_y, \ one_over_cov_sum_x, one_over_cov_sum_y, normalize): T = np.size(robot_mu_x) d_alpha = [0. for _ in range(2 * T * np.int(np.round(ess + 1)))] d_beta = [0. for _ in range(2 * T * np.int(np.round(ess + 1)))] d_llambda = np.asarray( [0. for _ in range(2 * T * np.int(np.round(ess + 1)))]) n = 2 for ped in range(ess): # if normalize == True: # # normalize_x = np.multiply(np.power(2*np.pi,-0.5), \ # one_over_std_sum_x[ped]) # # normalize_y = np.multiply(np.power(2*np.pi,-0.5), \ # one_over_std_sum_y[ped]) # else: normalize_x = 1. normalize_y = 1. vel_robot_x = np.tile(x[:T], (T, 1)).T - np.tile(x[n * T:(n + 1) * T], (T, 1)) vel_robot_y = np.tile(x[T:2 * T], (T, 1)).T - np.tile(x[(n + 1) * T:(n + 2) * T], (T, 1)) n = n + 2 vel_robot_x_2 = np.power(vel_robot_x, 2) vel_robot_y_2 = np.power(vel_robot_y, 2) quad_robot_x = np.multiply(one_over_cov_sum_x[ped], vel_robot_x_2) quad_robot_y = np.multiply(one_over_cov_sum_y[ped], vel_robot_y_2) Z_x = np.multiply(normalize_x, np.exp(-0.5 * quad_robot_x)) Z_y = np.multiply(normalize_y, np.exp(-0.5 * quad_robot_y)) Z = np.multiply(Z_x, Z_y) X = np.divide(Z, 1. - Z) alpha_x = np.multiply( X, np.multiply(vel_robot_x, one_over_cov_sum_x[ped])) alpha_y = np.multiply( X, np.multiply(vel_robot_y, one_over_cov_sum_y[ped])) # X and Y COMPONENT OF R DERIVATIVE d_alpha[:T] = np.add(d_alpha[:T], np.sum(alpha_x, axis=1)) d_alpha[T:2 * T] = np.add(d_alpha[T:2 * T], np.sum(alpha_y, axis=1)) d_beta[:T] = -np.dot(x[:T] - robot_mu_x, inv_cov_robot_x) d_beta[T:2 * T] = -np.dot(x[T:2 * T] - robot_mu_y, inv_cov_robot_y) d_llambda[0:2 * T] = np.add(d_alpha[0:2 * T], d_beta[0:2 * T]) # X AND Y COMPONENT OF PED DERIVATIVE n = 2 for ped in range(ess): # if normalize == True: # # normalize_x = np.multiply(np.power(2*np.pi,-0.5), \ # one_over_std_sum_x[ped]) # # normalize_y = np.multiply(np.power(2*np.pi,-0.5), \ # one_over_std_sum_y[ped]) # else: normalize_x = 1. normalize_y = 1. vel_ped_x = np.tile(x[:T], (T, 1)) - np.tile(x[n * T:(n + 1) * T], (T, 1)).T vel_ped_y = np.tile(x[T:2 * T], (T, 1)) - np.tile(x[(n + 1) * T:(n + 2) * T], (T, 1)).T vel_ped_x_2 = np.power(vel_ped_x, 2) vel_ped_y_2 = np.power(vel_ped_y, 2) quad_ped_x = np.multiply(one_over_cov_sum_x[ped], vel_ped_x_2) quad_ped_y = np.multiply(one_over_cov_sum_y[ped], vel_ped_y_2) Z_x = np.multiply(normalize_x, np.exp(-0.5 * quad_ped_x)) Z_y = np.multiply(normalize_y, np.exp(-0.5 * quad_ped_y)) Z = np.multiply(Z_x, Z_y) X = np.divide(Z, 1. - Z) alpha_x = np.multiply(X, np.multiply(vel_ped_x, one_over_cov_sum_x[ped])) alpha_y = np.multiply(X, np.multiply(vel_ped_y, one_over_cov_sum_y[ped])) d_alpha[n * T:(n + 1) * T] = -np.sum(alpha_x, axis=1) d_alpha[(n + 1) * T:(n + 2) * T] = -np.sum(alpha_y, axis=1) d_beta[n*T:(n+1)*T] = -np.dot(x[n*T:(n+1)*T]-ped_mu_x[ped], \ inv_cov_ped_x[ped]) d_beta[(n+1)*T:(n+2)*T] = -np.dot(x[(n+1)*T:(n+2)*T]-ped_mu_y[ped], \ inv_cov_ped_y[ped]) n = n + 2 d_llambda[2 * T:] = np.add(d_alpha[2 * T:], d_beta[2 * T:]) return -1. * d_llambda
def dd_ll(x, num_peds, ess, robot_mu_x, robot_mu_y, ped_mu_x, ped_mu_y, \ cov_robot_x, cov_robot_y, inv_cov_robot_x, inv_cov_robot_y, \ cov_ped_x, cov_ped_y, inv_cov_ped_x, inv_cov_ped_y, \ one_over_cov_sum_x, one_over_cov_sum_y, normalize): T = np.size(robot_mu_x) H = np.zeros((2 * T * np.int(ess + 1), 2 * T * np.int(ess + 1)), float) sum_d_alpha = [0. for _ in range(2 * T * np.int(np.round(ess + 1)))] n = 2 for ped in range(ess): # if normalize == True: # # normalize_x = np.multiply(np.power(2*np.pi,-0.5), \ # one_over_std_sum_x[ped]) # # normalize_y = np.multiply(np.power(2*np.pi,-0.5), \ # one_over_std_sum_y[ped]) # else: normalize_x = 1. normalize_y = 1. vel_robot_x = np.tile(x[:T], (T, 1)).T - np.tile(x[n * T:(n + 1) * T], (T, 1)) vel_robot_y = np.tile(x[T:2 * T], (T, 1)).T - np.tile(x[(n + 1) * T:(n + 2) * T], (T, 1)) vel_robot_x_2 = np.power(vel_robot_x, 2) vel_robot_y_2 = np.power(vel_robot_y, 2) vel_robot_x_y = np.multiply(vel_robot_x, vel_robot_y) one_over_cov_x_y = np.multiply(one_over_cov_sum_x[ped], \ one_over_cov_sum_y[ped]) quad_robot_x = np.multiply(one_over_cov_sum_x[ped], vel_robot_x_2) quad_robot_y = np.multiply(one_over_cov_sum_y[ped], vel_robot_y_2) Z_x = np.multiply(normalize_x, np.exp(-0.5 * quad_robot_x)) Z_y = np.multiply(normalize_y, np.exp(-0.5 * quad_robot_y)) Z = np.multiply(Z_x, Z_y) X = np.divide(Z, 1. - Z) X_2 = np.power(X, 2) X_plus_X2 = np.add(X, X_2) d_alpha_x = np.multiply(X, one_over_cov_sum_x[ped]) d_alpha_x = np.add(d_alpha_x, -np.multiply(X_plus_X2, np.power(\ np.multiply(vel_robot_x, one_over_cov_sum_x[ped]), 2))) d_alpha_y = np.multiply(X, one_over_cov_sum_y[ped]) d_alpha_y = np.add(d_alpha_y, -np.multiply(X_plus_X2, np.power(\ np.multiply(vel_robot_y, one_over_cov_sum_y[ped]), 2))) sum_d_alpha[:T] = np.add(sum_d_alpha[:T], np.sum(d_alpha_x, axis=1)) sum_d_alpha[T:2 * T] = np.add(sum_d_alpha[T:2 * T], np.sum(d_alpha_y, axis=1)) d_off_alpha = -np.multiply(X_plus_X2, np.multiply(vel_robot_x_y, \ one_over_cov_x_y)) # OFF DIAGONALS H[:T, T:2 * T] = np.add(H[:T, T:2 * T], np.diag(np.sum(d_off_alpha, axis=1))) H[:T, n * T:(n + 1) * T] = -1. * d_alpha_x H[n * T:(n + 1) * T, :T] = H[:T, n * T:(n + 1) * T].T H[T:2 * T, (n + 1) * T:(n + 2) * T] = -1. * d_alpha_y H[(n + 1) * T:(n + 2) * T, T:2 * T] = H[T:2 * T, (n + 1) * T:(n + 2) * T].T H[T:2*T,n*T:(n+1)*T] = np.multiply(X_plus_X2, np.multiply(vel_robot_x_y, \ one_over_cov_x_y)) H[n * T:(n + 1) * T, T:2 * T] = H[T:2 * T, n * T:(n + 1) * T].T H[:T,(n+1)*T:(n+2)*T] = np.multiply(X_plus_X2, np.multiply(vel_robot_x_y, \ one_over_cov_x_y)) H[(n + 1) * T:(n + 2) * T, :T] = H[:T, (n + 1) * T:(n + 2) * T].T n = n + 2 H[:T, :T] = np.add(np.diag(sum_d_alpha[:T]), -1. * inv_cov_robot_x) H[T:2 * T, T:2 * T] = np.add(np.diag(sum_d_alpha[T:2 * T]), -1. * inv_cov_robot_y) H[T:2 * T, :T] = H[:T, T:2 * T].T # PED DIAGONALS n = 2 for ped in range(ess): # if normalize == True: # # normalize_x = np.multiply(np.power(2*np.pi,-0.5), \ # one_over_std_sum_x[ped]) # # normalize_y = np.multiply(np.power(2*np.pi,-0.5), \ # one_over_std_sum_y[ped]) # else: normalize_x = 1. normalize_y = 1. vel_ped_x = np.tile(x[:T], (T, 1)) - np.tile(x[n * T:(n + 1) * T], (T, 1)).T vel_ped_y = np.tile(x[T:2 * T], (T, 1)) - np.tile(x[(n + 1) * T:(n + 2) * T], (T, 1)).T vel_ped_x_2 = np.power(vel_ped_x, 2) vel_ped_y_2 = np.power(vel_ped_y, 2) vel_ped_x_y = np.multiply(vel_ped_x, vel_ped_y) one_over_cov_x_y = np.multiply(one_over_cov_sum_x[ped], \ one_over_cov_sum_y[ped]) quad_ped_x = np.multiply(one_over_cov_sum_x[ped], vel_ped_x_2) quad_ped_y = np.multiply(one_over_cov_sum_y[ped], vel_ped_y_2) Z_x = np.multiply(normalize_x, np.exp(-0.5 * quad_ped_x)) Z_y = np.multiply(normalize_y, np.exp(-0.5 * quad_ped_y)) Z = np.multiply(Z_x, Z_y) X = np.divide(Z, 1. - Z) X_2 = np.power(X, 2) X_plus_X2 = np.add(X, X_2) d_alpha_x = np.multiply(X, one_over_cov_sum_x[ped]) d_alpha_x = np.add(d_alpha_x, -np.multiply(X_plus_X2, np.power(\ np.multiply(vel_ped_x, one_over_cov_sum_x[ped]), 2))) d_alpha_y = np.multiply(X, one_over_cov_sum_y[ped]) d_alpha_y = np.add(d_alpha_y, -np.multiply(X_plus_X2, np.power(\ np.multiply(vel_ped_y, one_over_cov_sum_y[ped]), 2))) H[n*T:(n+1)*T,n*T:(n+1)*T] = np.diag(np.sum(d_alpha_x, axis=1)) - \ inv_cov_ped_x[ped] H[(n+1)*T:(n+2)*T,(n+1)*T:(n+2)*T] = np.diag(np.sum(d_alpha_y, axis=1)) - \ inv_cov_ped_y[ped] H[n*T:(n+1)*T,(n+1)*T:(n+2)*T] = -np.diag(np.sum(np.multiply(X_plus_X2, \ np.multiply(vel_ped_x_y, one_over_cov_x_y)), axis=1)) H[(n + 1) * T:(n + 2) * T, n * T:(n + 1) * T] = H[n * T:(n + 1) * T, (n + 1) * T:(n + 2) * T].T n = n + 2 return -1. * H
def plot_three_fits(self, run1, run2, run3, **kwargs): ## strip off model, normalizer, etc., ## model1 = run1.model model2 = run2.model model3 = run3.model normalizer1 = run1.normalizer normalizer2 = run2.normalizer normalizer3 = run3.normalizer # get weights cost_history1 = run1.cost_histories[0] ind1 = np.argmin(cost_history1) w1 = run1.weight_histories[0][ind1] cost_history2 = run2.cost_histories[0] ind2 = np.argmin(cost_history2) w2 = run2.weight_histories[0][ind2] cost_history3 = run3.cost_histories[0] ind3 = np.argmin(cost_history3) w3 = run3.weight_histories[0][ind3] # construct figure fig, axs = plt.subplots(1, 3, figsize=(10, 4)) # create subplot with 2 panels gs = gridspec.GridSpec(1, 3) ax1 = plt.subplot(gs[0], aspect='equal') ax2 = plt.subplot(gs[1], aspect='equal') ax3 = plt.subplot(gs[2], aspect='equal') # loop over axes for ax in [ax1, ax2, ax3]: ### from above ax.set_xlabel(r'$x_1$', fontsize=15) ax.set_ylabel(r'$x_2$', fontsize=15, rotation=0, labelpad=20) ax.xaxis.set_major_formatter(FormatStrFormatter('%.1f')) ax.yaxis.set_major_formatter(FormatStrFormatter('%.1f')) # plot points in 2d ind0 = np.argwhere(self.y == +1) ax.scatter(self.x[ind0, 0], self.x[ind0, 1], s=55, color=self.colors[0], edgecolor='k') ind1 = np.argwhere(self.y == -1) ax.scatter(self.x[ind1, 0], self.x[ind1, 1], s=55, color=self.colors[1], edgecolor='k') ### create surface and boundary plot ### xmin1 = np.min(self.x[:, 0]) xmax1 = np.max(self.x[:, 0]) xgap1 = (xmax1 - xmin1) * 0.05 xmin1 -= xgap1 xmax1 += xgap1 xmin2 = np.min(self.x[:, 1]) xmax2 = np.max(self.x[:, 1]) xgap2 = (xmax2 - xmin2) * 0.05 xmin2 -= xgap2 xmax2 += xgap2 # plot boundary for 2d plot r1 = np.linspace(xmin1, xmax1, 300) r2 = np.linspace(xmin2, xmax2, 300) s, t = np.meshgrid(r1, r2) s = np.reshape(s, (np.size(s), 1)) t = np.reshape(t, (np.size(t), 1)) h = np.concatenate((s, t), axis=1) # plot model z = 0 if ax == ax1: z = model1(normalizer1(h.T), w1) ax.set_title('underfitting', fontsize=14) if ax == ax2: z = model2(normalizer2(h.T), w2) ax.set_title('overfitting', fontsize=14) if ax == ax3: z = model3(normalizer3(h.T), w3) ax.set_title(r'"good"', fontsize=14) z = np.sign(z) # reshape it s.shape = (np.size(r1), np.size(r2)) t.shape = (np.size(r1), np.size(r2)) z.shape = (np.size(r1), np.size(r2)) #### plot contour, color regions #### ax.contour(s, t, z, colors='k', linewidths=2.5, levels=[0], zorder=2) ax.contourf(s, t, z, colors=[self.colors[1], self.colors[0]], alpha=0.15, levels=range(-1, 2))
gen_subspace_dim, dsc_subspace_dim = 100, 1000 gen_subs_weights, dsc_subs_weights = np.zeros(gen_subspace_dim), np.zeros( dsc_subspace_dim) seed = npr.RandomState(0) # Training parameters param_scale = 0.1 batch_size = 77 num_epochs = 5000 step_size_max = 0.001 step_size_min = 0.001 # Initialize gen & dsc params gen_layer_sizes = [latent_dim, 20, 20, data_dim] init_gen_params = init_random_params(param_scale, gen_layer_sizes) num_gen_params = gen_subspace_dim if subspace_training else np.size( flatten(init_gen_params)[0]) print("num gen params: " + str(num_gen_params)) if show_gen_params: dsc_input_size = data_dim + num_gen_params else: dsc_input_size = data_dim dsc_layer_sizes = [dsc_input_size, 30, 20, latent_dim] init_dsc_params = init_random_params(param_scale, dsc_layer_sizes) # Draw random subspace matrices gen_subs_project = sample_subs_projections(gen_layer_sizes, gen_subspace_dim, subspace_training, rs=seed) dsc_subs_project = sample_subs_projections(dsc_layer_sizes, dsc_subspace_dim,
def fit_weights_and_save( weights_file, ca_data_file='rs_vm_denoise_200605.npy', vip_silencing_data_file='vip_halo_data_for_sim.npy', vip_activation_data_file='vip_chrimson_data_for_sim.npy', sst_silencing_data_file='sst_halo_data_for_sim.npy', constrain_wts=None, allow_var=True, fit_s02=True, constrain_isn=True, l2_penalty=0.01, init_noise=0.1, init_W_from_lsq=False, scale_init_by=1, init_W_from_file=False, init_file=None): nsize, ncontrast = 6, 6 # In[3]: npfile = np.load(ca_data_file, allow_pickle=True)[( )] #,{'rs':rs,'rs_denoise':rs_denoise},allow_pickle=True) rs = npfile['rs'] rs_denoise = npfile['rs_denoise'] # In[4]: nsize, ncontrast, ndir = 6, 6, 8 ori_dirs = [[0, 4], [2, 6]] #[[0,4],[1,3,5,7],[2,6]] nT = len(ori_dirs) nS = len(rs_denoise[0]) def sum_to_1(r): R = r.reshape((r.shape[0], -1)) #R = R/np.nansum(R[:,~np.isnan(R.sum(0))],axis=1)[:,np.newaxis] R = R / np.nansum(R, axis=1)[:, np.newaxis] # changed 8/28 return R def norm_to_mean(r): R = r.reshape((r.shape[0], -1)) R = R / np.nanmean(R[:, ~np.isnan(R.sum(0))], axis=1)[:, np.newaxis] return R Rs = [[None, None] for i in range(len(rs))] Rso = [[[None for iT in range(nT)] for iS in range(nS)] for icelltype in range(len(rs))] rso = [[[None for iT in range(nT)] for iS in range(nS)] for icelltype in range(len(rs))] for iR, r in enumerate(rs): #rs_denoise): print(iR) for ialign in range(nS): Rs[iR][ialign] = sum_to_1(r[ialign][:, :nsize, :]) # Rs[iR][ialign] = von_mises_denoise(Rs[iR][ialign].reshape((-1,nsize,ncontrast,ndir))) kernel = np.ones((1, 2, 2)) kernel = kernel / kernel.sum() for iR, r in enumerate(rs): for ialign in range(nS): for iori in range(nT): Rso[iR][ialign][iori] = np.nanmean( Rs[iR][ialign].reshape( (-1, nsize, ncontrast, ndir))[:, :, :, ori_dirs[iori]], -1) Rso[iR][ialign][iori][:, :, 0] = np.nanmean( Rso[iR][ialign][iori][:, :, 0], 1)[:, np.newaxis] Rso[iR][ialign][iori][:, 1:, 1:] = ssi.convolve( Rso[iR][ialign][iori], kernel, 'valid') Rso[iR][ialign][iori] = Rso[iR][ialign][iori].reshape( Rso[iR][ialign][iori].shape[0], -1) #kernel = np.ones((1,2,2)) #kernel = kernel/kernel.sum() # #for iR,r in enumerate(rs): # for ialign in range(nS): # for iori in range(nT): # Rso[iR][ialign][iori] = np.nanmean(Rs[iR][ialign].reshape((-1,nsize,ncontrast,ndir))[:,:,:,ori_dirs[iori]],-1) # Rso[iR][ialign][iori] = ssi.convolve(Rso[iR][ialign][iori],kernel,'same') # Rso[iR][ialign][iori] = Rso[iR][ialign][iori].reshape(Rso[iR][ialign][iori].shape[0],-1) # In[6]: def set_bound(bd, code, val=0): # set bounds to 0 where 0s occur in 'code' for iitem in range(len(bd)): bd[iitem][code[iitem]] = val # In[7]: nN = 36 nS = 2 nP = 2 nT = 2 nQ = 4 # code for bounds: 0 , constrained to 0 # +/-1 , constrained to +/-1 # 1.5, constrained to [0,1] # 2 , constrained to [0,inf) # -2 , constrained to (-inf,0] # 3 , unconstrained Wmx_bounds = 3 * np.ones((nP, nQ), dtype=int) Wmx_bounds[0, 1] = 0 # SSTs don't receive L4 input if allow_var: Wsx_bounds = 3 * np.ones( Wmx_bounds.shape) #Wmx_bounds.copy()*0 #np.zeros_like(Wmx_bounds) Wsx_bounds[0, 1] = 0 else: Wsx_bounds = np.zeros( Wmx_bounds.shape) #Wmx_bounds.copy()*0 #np.zeros_like(Wmx_bounds) Wmy_bounds = 3 * np.ones((nQ, nQ), dtype=int) Wmy_bounds[0, :] = 2 # PCs are excitatory Wmy_bounds[1:, :] = -2 # all the cell types except PCs are inhibitory Wmy_bounds[1, 1] = 0 # SSTs don't inhibit themselves # Wmy_bounds[3,1] = 0 # PVs are allowed to inhibit SSTs, consistent with Hillel's unpublished results, but not consistent with Pfeffer et al. Wmy_bounds[ 2, 0] = 0 # VIPs don't inhibit L2/3 PCs. According to Pfeffer et al., only L5 PCs were found to get VIP inhibition if allow_var: Wsy_bounds = 3 * np.ones( Wmy_bounds.shape) #Wmy_bounds.copy()*0 #np.zeros_like(Wmy_bounds) Wsy_bounds[1, 1] = 0 Wsy_bounds[3, 1] = 0 Wsy_bounds[2, 0] = 0 else: Wsy_bounds = np.zeros( Wmy_bounds.shape) #Wmy_bounds.copy()*0 #np.zeros_like(Wmy_bounds) if not constrain_wts is None: for wt in constrain_wts: Wmy_bounds[wt[0], wt[1]] = 0 Wsy_bounds[wt[0], wt[1]] = 0 def tile_nS_nT_nN(kernel): row = np.concatenate([kernel for idim in range(nS * nT)], axis=0)[np.newaxis, :] tiled = np.concatenate([row for irow in range(nN)], axis=0) return tiled if fit_s02: s02_bounds = 2 * np.ones( (nQ, )) # permitting noise as a free parameter else: s02_bounds = np.ones((nQ, )) k_bounds = 1.5 * np.ones((nQ, )) kappa_bounds = np.ones((1, )) # kappa_bounds = 2*np.ones((1,)) T_bounds = 1.5 * np.ones((nQ, )) X_bounds = tile_nS_nT_nN(np.array([2, 1])) # X_bounds = np.array([np.array([2,1,2,1])]*nN) Xp_bounds = tile_nS_nT_nN(np.array([3, 1])) # Xp_bounds = np.array([np.array([3,1,3,1])]*nN) # Y_bounds = tile_nS_nT_nN(2*np.ones((nQ,))) # # Y_bounds = 2*np.ones((nN,nT*nS*nQ)) Eta_bounds = tile_nS_nT_nN(3 * np.ones((nQ, ))) # Eta_bounds = 3*np.ones((nN,nT*nS*nQ)) if allow_var: Xi_bounds = tile_nS_nT_nN(3 * np.ones((nQ, ))) else: Xi_bounds = tile_nS_nT_nN(np.zeros((nQ, ))) # Xi_bounds = 3*np.ones((nN,nT*nS*nQ)) h1_bounds = -2 * np.ones((1, )) h2_bounds = 2 * np.ones((1, )) h3_bounds = -2 * np.ones((1, )) # In[8]: # shapes = [(nP,nQ),(nQ,nQ),(nP,nQ),(nQ,nQ),(nQ,),(nQ,),(1,),(nN,nS*nP),(nN,nS*nQ),(nN,nS*nQ),(nN,nS*nQ)] shapes = [(nP, nQ), (nQ, nQ), (nP, nQ), (nQ, nQ), (nQ, ), (nQ, ), (1, ), (nQ, ), (nN, nT * nS * nP), (nN, nT * nS * nP), (nN, nT * nS * nQ), (nN, nT * nS * nQ), (1, ), (1, ), (1, ), (nN, nT * nS * nQ), (nN, nT * nS * nQ), (nN, nT * nS * nQ)] print('size of shapes: ' + str(np.sum([np.prod(shp) for shp in shapes]))) # Wmx, Wmy, Wsx, Wsy, s02, k, kappa,T, XX, XXp, Eta, Xi, h1, h2, Eta1, Eta2 lb = [-np.inf * np.ones(shp) for shp in shapes] ub = [np.inf * np.ones(shp) for shp in shapes] bdlist = [ Wmx_bounds, Wmy_bounds, Wsx_bounds, Wsy_bounds, s02_bounds, k_bounds, kappa_bounds, T_bounds, X_bounds, Xp_bounds, Eta_bounds, Xi_bounds, h1_bounds, h2_bounds, h3_bounds, Eta_bounds, Eta_bounds, Eta_bounds ] set_bound(lb, [bd == 0 for bd in bdlist], val=0) set_bound(ub, [bd == 0 for bd in bdlist], val=0) set_bound(lb, [bd == 2 for bd in bdlist], val=0) set_bound(ub, [bd == -2 for bd in bdlist], val=0) set_bound(lb, [bd == 1 for bd in bdlist], val=1) set_bound(ub, [bd == 1 for bd in bdlist], val=1) set_bound(lb, [bd == 1.5 for bd in bdlist], val=0) set_bound(ub, [bd == 1.5 for bd in bdlist], val=1) set_bound(lb, [bd == -1 for bd in bdlist], val=-1) set_bound(ub, [bd == -1 for bd in bdlist], val=-1) # for bd in [lb,ub]: # for ind in [2,3]: # bd[ind][:,1] = 0 # temporary for no variation expt. # lb[2] = np.zeros_like(lb[2]) # lb[3] = np.zeros_like(lb[3]) # lb[4] = np.ones_like(lb[4]) # lb[5] = np.zeros_like(lb[5]) # ub[2] = np.zeros_like(ub[2]) # ub[3] = np.zeros_like(ub[3]) # ub[4] = np.ones_like(ub[4]) # ub[5] = np.ones_like(ub[5]) # temporary for no variation expt. lb = np.concatenate([a.flatten() for a in lb]) ub = np.concatenate([b.flatten() for b in ub]) bounds = [(a, b) for a, b in zip(lb, ub)] # In[10]: nS = 2 ndims = 5 ncelltypes = 5 Yhat = [[None for iT in range(nT)] for iS in range(nS)] Xhat = [[None for iT in range(nT)] for iS in range(nS)] Ypc_list = [[None for iT in range(nT)] for iS in range(nS)] Xpc_list = [[None for iT in range(nT)] for iS in range(nS)] mx = [None for iS in range(nS)] for iS in range(nS): mx[iS] = np.zeros((ncelltypes, )) yy = [None for icelltype in range(ncelltypes)] for icelltype in range(ncelltypes): yy[icelltype] = np.nanmean(Rso[icelltype][iS][0], 0) mx[iS][icelltype] = np.nanmax(yy[icelltype]) for iT in range(nT): y = [ np.nanmean(Rso[icelltype][iS][iT], axis=0)[:, np.newaxis] / mx[iS][icelltype] for icelltype in range(1, ncelltypes) ] Ypc_list[iS][iT] = [None for icelltype in range(1, ncelltypes)] for icelltype in range(1, ncelltypes): rss = Rso[icelltype][iS][iT].copy( ) #/mx[iS][icelltype] #.reshape(Rs[icelltype][ialign].shape[0],-1) #rss = Rso[icelltype][iS][iT].copy() #.reshape(Rs[icelltype][ialign].shape[0],-1) rss = rss[np.isnan(rss).sum(1) == 0] # print(rss.max()) # rss[rss<0] = 0 # rss = rss[np.random.randn(rss.shape[0])>0] try: u, s, v = np.linalg.svd(rss - np.mean(rss, 0)[np.newaxis]) Ypc_list[iS][iT][icelltype - 1] = [ (s[idim], v[idim]) for idim in range(ndims) ] # print('yep on Y') # print(np.min(np.sum(rs[icelltype][iS][iT],axis=1))) except: # print('nope on Y') print(np.mean(np.isnan(rss))) print(np.min(np.sum(rs[icelltype][iS][iT], axis=1))) Yhat[iS][iT] = np.concatenate(y, axis=1) # x = sim_utils.columnize(Rso[0][iS][iT])[:,np.newaxis] icelltype = 0 #x = np.nanmean(Rso[icelltype][iS][iT],0)[:,np.newaxis]#/mx[iS][icelltype] x = np.nanmean(Rso[icelltype][iS][iT], 0)[:, np.newaxis] / mx[iS][icelltype] # opto_column = np.concatenate((np.zeros((nN,)),np.zeros((nNO/2,)),np.ones((nNO/2,))),axis=0)[:,np.newaxis] Xhat[iS][iT] = np.concatenate((x, np.ones_like(x)), axis=1) # Xhat[iS][iT] = np.concatenate((x,np.ones_like(x),opto_column),axis=1) icelltype = 0 #rss = Rso[icelltype][iS][iT].copy()/mx[iS][icelltype] rss = Rso[icelltype][iS][iT].copy() rss = rss[np.isnan(rss).sum(1) == 0] # try: u, s, v = np.linalg.svd(rss - rss.mean(0)[np.newaxis]) Xpc_list[iS][iT] = [None for iinput in range(2)] Xpc_list[iS][iT][0] = [(s[idim], v[idim]) for idim in range(ndims)] Xpc_list[iS][iT][1] = [(0, np.zeros((Xhat[0][0].shape[0], ))) for idim in range(ndims)] # except: # print('nope on X') # print(np.mean(np.isnan(rss))) # print(np.min(np.sum(Rso[icelltype][iS][iT],axis=1))) nN, nP = Xhat[0][0].shape nQ = Yhat[0][0].shape[1] # In[11]: def compute_f_(Eta, Xi, s02): return sim_utils.f_miller_troyer( Eta, Xi**2 + np.concatenate([s02 for ipixel in range(nS * nT)])) def compute_fprime_m_(Eta, Xi, s02): return sim_utils.fprime_miller_troyer( Eta, Xi**2 + np.concatenate([s02 for ipixel in range(nS * nT)])) * Xi def compute_fprime_s_(Eta, Xi, s02): s2 = Xi**2 + np.concatenate((s02, s02), axis=0) return sim_utils.fprime_s_miller_troyer(Eta, s2) * (Xi / s2) def sorted_r_eigs(w): drW, prW = np.linalg.eig(w) srtinds = np.argsort(drW) return drW[srtinds], prW[:, srtinds] # In[12]: # 0.Wmx, 1.Wmy, 2.Wsx, 3.Wsy, 4.s02,5.K, 6.kappa,7.T,8.XX, 9.XXp, 10.Eta, 11.Xi, 12.h1, 13.h2, 14.Eta1, 15.Eta2 shapes = [(nP, nQ), (nQ, nQ), (nP, nQ), (nQ, nQ), (nQ, ), (nQ, ), (1, ), (nQ, ), (nN, nT * nS * nP), (nN, nT * nS * nP), (nN, nT * nS * nQ), (nN, nT * nS * nQ), (1, ), (1, ), (1, ), (nN, nT * nS * nQ), (nN, nT * nS * nQ), (nN, nT * nS * nQ)] print('size of shapes: ' + str(np.sum([np.prod(shp) for shp in shapes]))) # In[13]: import calnet.fitting_spatial_feature import sim_utils # In[14]: opto_dict = np.load(vip_silencing_data_file, allow_pickle=True)[()] Yhat_opto = opto_dict['Yhat_opto'] for iS in range(nS): mx = np.zeros((nQ, )) for iQ in range(nQ): slicer = slice(nQ * nT * iS + iQ, nQ * nT * (1 + iS), nQ) mx[iQ] = np.nanmax(Yhat_opto[0::2][:, slicer]) Yhat_opto[:, slicer] = Yhat_opto[:, slicer] / mx[iQ] #Yhat_opto = Yhat_opto/Yhat_opto[0::2].max(0)[np.newaxis,:] print(Yhat_opto.shape) h_opto = opto_dict['h_opto'] dYY1 = Yhat_opto[1::2] - Yhat_opto[0::2] for to_overwrite in [1, 2, 5, 6]: # overwrite sst and vip with off-centered values dYY1[:, to_overwrite] = dYY1[:, to_overwrite + 8] for to_overwrite in [11, 15]: dYY1[:, to_overwrite] = np.nan #dYY1[:,to_overwrite-8] opto_dict = np.load(vip_activation_data_file, allow_pickle=True)[()] Yhat_opto = opto_dict['Yhat_opto'] for iS in range(nS): mx = np.zeros((nQ, )) for iQ in range(nQ): slicer = slice(nQ * nT * iS + iQ, nQ * nT * (1 + iS), nQ) mx[iQ] = np.nanmax(Yhat_opto[0::2][:, slicer]) Yhat_opto[:, slicer] = Yhat_opto[:, slicer] / mx[iQ] #Yhat_opto = Yhat_opto/Yhat_opto[0::2].max(0)[np.newaxis,:] print(Yhat_opto.shape) h_opto = opto_dict['h_opto'] dYY2 = Yhat_opto[1::2] - Yhat_opto[0::2] opto_dict = np.load(sst_silencing_data_file, allow_pickle=True)[()] Yhat_opto = opto_dict['Yhat_opto'] for iS in range(nS): mx = np.zeros((nQ, )) for iQ in range(nQ): slicer = slice(nQ * nT * iS + iQ, nQ * nT * (1 + iS), nQ) mx[iQ] = np.nanmax(Yhat_opto[0::2][:, slicer]) Yhat_opto[:, slicer] = Yhat_opto[:, slicer] / mx[iQ] #Yhat_opto = Yhat_opto/Yhat_opto[0::2].max(0)[np.newaxis,:] print(Yhat_opto.shape) h_opto = opto_dict['h_opto'] dYY3 = Yhat_opto[1::2] - Yhat_opto[0::2] print('dYY1 mean: %03f' % np.nanmean(np.abs(dYY1))) print('dYY2 mean: %03f' % np.nanmean(np.abs(dYY2))) dYY = np.concatenate((dYY1, dYY2, dYY3), axis=0) opto_mask = ~np.isnan(dYY) dYY[~opto_mask] = 0 np.save( '/Users/dan/Documents/notebooks/mossing-PC/shared_data/calnet_data/dYY.npy', dYY) # In[ ]: from importlib import reload reload(calnet) #reload(calnet.fitting_spatial_feature_opto_nonlinear) reload(sim_utils) # reload(calnet.fitting_spatial_feature) # W0list = [np.ones(shp) for shp in shapes] wt_dict = {} wt_dict['X'] = 1 wt_dict['Y'] = 5 wt_dict['Eta'] = 10 # 1 # wt_dict['Xi'] = 0.1 wt_dict['stims'] = np.ones((nN, 1)) #(np.arange(30)/30)[:,np.newaxis]**1 # wt_dict['barrier'] = 0. #30.0 #0.1 wt_dict['opto'] = 1e-1 #1e1 wt_dict['isn'] = 3 wt_dict['dYY'] = 300 #1000 wt_dict['Eta12'] = 100 wt_dict['EtaTV'] = 0.3 wt_dict['coupling'] = 0 YYhat = calnet.fitting_spatial_feature_opto_nonlinear_tridi.flatten_nested_list_of_2d_arrays( Yhat) XXhat = calnet.fitting_spatial_feature_opto_nonlinear_tridi.flatten_nested_list_of_2d_arrays( Xhat) Eta0 = invert_f_mt(YYhat) ntries = 1 nhyper = 1 dt = 1e-1 niter = int(np.round(50 / dt)) #int(1e4) perturbation_size = 5e-2 # learning_rate = 1e-4 # 1e-5 #np.linspace(3e-4,1e-3,niter+1) # 1e-5 #l2_penalty = 0.1 Wt = [[None for itry in range(ntries)] for ihyper in range(nhyper)] loss = np.zeros((nhyper, ntries)) is_neg = np.array([b[1] for b in bounds]) == 0 counter = 0 negatize = [np.zeros(shp, dtype='bool') for shp in shapes] for ishp, shp in enumerate(shapes): nel = np.prod(shp) negatize[ishp][:][is_neg[counter:counter + nel].reshape(shp)] = True counter = counter + nel for ihyper in range(nhyper): for itry in range(ntries): print((ihyper, itry)) W0list = [ init_noise * (ihyper + 1) * np.random.rand(*shp) for shp in shapes ] print('size of shapes: ' + str(np.sum([np.prod(shp) for shp in shapes]))) print('size of w0: ' + str(np.sum([np.size(x) for x in W0list]))) print('len(W0list) : ' + str(len(W0list))) counter = 0 for ishp, shp in enumerate(shapes): W0list[ishp][negatize[ishp]] = -W0list[ishp][negatize[ishp]] W0list[4] = np.ones(shapes[5]) # s02 W0list[5] = np.ones(shapes[5]) # K W0list[6] = np.ones(shapes[6]) # kappa W0list[7] = np.ones(shapes[7]) # T W0list[8] = np.concatenate(Xhat, axis=1) #XX W0list[9] = np.zeros_like(W0list[8]) #XXp W0list[10] = Eta0.copy() #np.zeros(shapes[10]) #Eta W0list[11] = np.zeros(shapes[11]) #Xi W0list[15] = Eta0.copy() # Eta1 W0list[16] = Eta0.copy() # Eta2 W0list[17] = Eta0.copy() # Eta2 #[Wmx,Wmy,Wsx,Wsy,s02,k,kappa,T,XX,XXp,Eta,Xi] # W0list = Wstar_dict['as_list'].copy() # W0list[1][1,0] = -1.5 # W0list[1][3,0] = -1.5 if init_W_from_lsq: W0list[0], W0list[1] = initialize_W(Xhat, Yhat, scale_by=scale_init_by) for ivar in range(0, 2): W0list[ivar] = W0list[ivar] + init_noise * np.random.randn( *W0list[ivar].shape) if constrain_isn: W0list[1][0, 0] = 3 W0list[1][0, 3] = 5 W0list[1][3, 0] = -5 W0list[1][3, 3] = -5 if init_W_from_file: npyfile = np.load(init_file, allow_pickle=True)[()] W0list = npyfile['as_list'] if len(W0list) < len(shapes): W0list = W0list + [ np.array(0.7), -np.array(0.7), W0list[10].copy(), W0list[10].copy(), W0list[10].copy() ] # add h2 # wt_dict['Xi'] = 10 # wt_dict['Eta'] = 10 print('size of bounds: ' + str(np.sum([np.size(x) for x in bdlist]))) print('size of w0: ' + str(np.sum([np.size(x) for x in W0list]))) print('size of shapes: ' + str(np.sum([np.prod(shp) for shp in shapes]))) Wt[ihyper][itry], loss[ihyper][ itry], gr, hess, result = calnet.fitting_spatial_feature_opto_nonlinear_tridi.fit_W_sim( Xhat, Xpc_list, Yhat, Ypc_list, pop_rate_fn=sim_utils.f_miller_troyer, pop_deriv_fn=sim_utils.fprime_miller_troyer, neuron_rate_fn=sim_utils.evaluate_f_mt, W0list=W0list.copy(), bounds=bounds, niter=niter, wt_dict=wt_dict, l2_penalty=l2_penalty, compute_hessian=False, dt=dt, perturbation_size=perturbation_size, dYY=dYY, constrain_isn=constrain_isn, opto_mask=opto_mask) # Wt[ihyper][itry] = [w[-1] for w in Wt_temp] # loss[ihyper,itry] = loss_temp[-1] # In[285]: def parse_W(W): Wmx, Wmy, Wsx, Wsy, s02, K, kappa, T, XX, XXp, Eta, Xi, h1, h2, h3, Eta1, Eta2, Eta3 = W return Wmx, Wmy, Wsx, Wsy, s02, K, kappa, T, XX, XXp, Eta, Xi, h1, h2, h3, Eta1, Eta2, Eta3 itry = 0 Wmx, Wmy, Wsx, Wsy, s02, K, kappa, T, XX, XXp, Eta, Xi, h1, h2, h3, Eta1, Eta2, Eta3 = parse_W( Wt[0][0]) # In[286]: labels = [ 'Wmx', 'Wmy', 'Wsx', 'Wsy', 's02', 'K', 'kappa', 'T', 'XX', 'XXp', 'Eta', 'Xi', 'h1', 'h2', 'h3', 'Eta1', 'Eta2', 'Eta3' ] Wstar_dict = {} for i, label in enumerate(labels): Wstar_dict[label] = Wt[0][0][i] Wstar_dict['as_list'] = [ Wmx, Wmy, Wsx, Wsy, s02, K, kappa, T, XX, XXp, Eta, Xi, h1, h2, h3, Eta1, Eta2, Eta3 ] Wstar_dict['loss'] = loss[0][0] Wstar_dict['wt_dict'] = wt_dict np.save(weights_file, Wstar_dict, allow_pickle=True)
def draw_it(self,w_init,max_its,**kwargs): ### input arguments ### self.max_its = max_its self.grad = compute_grad(self.g) # gradient of input function self.w_init = w_init if 'beta' in kwargs: self.beta = kwargs['beta'] pts = 'off' if 'pts' in kwargs: pts = 'off' linewidth = 2.5 if 'linewidth' in kwargs: linewidth = kwargs['linewidth'] view = [20,-50] if 'view' in kwargs: view = kwargs['view'] axes = False if 'axes' in kwargs: axes = kwargs['axes'] plot_final = False if 'plot_final' in kwargs: plot_final = kwargs['plot_final'] num_contours = 15 if 'num_contours' in kwargs: num_contours = kwargs['num_contours'] # get initial point self.w_init = w_init if np.size(self.w_init) == 2: self.w_init = np.asarray([float(s) for s in self.w_init]) else: self.w_init = np.asarray([float(self.w_init)]) # take in user defined maximum number of iterations self.max_its = max_its # construct figure fig, axs = plt.subplots(1, 2, figsize=(9,4)) # create subplot with 3 panels, plot input function in center plot gs = gridspec.GridSpec(1, 2, width_ratios=[2,1]) ax = plt.subplot(gs[0],aspect = 'equal'); ax2 = plt.subplot(gs[1]) # ,sharey = ax); #### run local random search algorithm #### self.w_hist = [] self.run_newtons_method() # colors for points s = np.linspace(0,1,len(self.w_hist[:round(len(self.w_hist)/2)])) s.shape = (len(s),1) t = np.ones(len(self.w_hist[round(len(self.w_hist)/2):])) t.shape = (len(t),1) s = np.vstack((s,t)) colorspec = [] colorspec = np.concatenate((s,np.flipud(s)),1) colorspec = np.concatenate((colorspec,np.zeros((len(s),1))),1) #### define input space for function and evaluate #### if np.size(self.w_init) == 2: # function is multi-input, plot 3d function contour # set viewing limits on contour plot xvals = [self.w_hist[s][0] for s in range(len(self.w_hist))] xvals.append(self.w_init[0]) yvals = [self.w_hist[s][1] for s in range(len(self.w_hist))] yvals.append(self.w_init[1]) xmax = max(xvals) xmin = min(xvals) xgap = (xmax - xmin)*0.1 ymax = max(yvals) ymin = min(yvals) ygap = (ymax - ymin)*0.1 xmin -= xgap xmax += xgap ymin -= ygap ymax += ygap if 'xmin' in kwargs: xmin = kwargs['xmin'] if 'xmax' in kwargs: xmax = kwargs['xmax'] if 'ymin' in kwargs: ymin = kwargs['ymin'] if 'ymax' in kwargs: ymax = kwargs['ymax'] w1 = np.linspace(xmin,xmax,400) w2 = np.linspace(ymin,ymax,400) w1_vals, w2_vals = np.meshgrid(w1,w2) w1_vals.shape = (len(w1)**2,1) w2_vals.shape = (len(w2)**2,1) h = np.concatenate((w1_vals,w2_vals),axis=1) func_vals = np.asarray([self.g(s) for s in h]) w1_vals.shape = (len(w1),len(w1)) w2_vals.shape = (len(w2),len(w2)) func_vals.shape = (len(w1),len(w2)) ### make contour right plot - as well as horizontal and vertical axes ### # set level ridges num_contours = kwargs['num_contours'] levelmin = min(func_vals.flatten()) levelmax = max(func_vals.flatten()) cutoff = 0.5 cutoff = (levelmax - levelmin)*cutoff numper = 3 levels1 = np.linspace(cutoff,levelmax,numper) num_contours -= numper levels2 = np.linspace(levelmin,cutoff,min(num_contours,numper)) levels = np.unique(np.append(levels1,levels2)) num_contours -= numper while num_contours > 0: cutoff = levels[1] levels2 = np.linspace(levelmin,cutoff,min(num_contours,numper)) levels = np.unique(np.append(levels2,levels)) num_contours -= numper a = ax.contour(w1_vals, w2_vals, func_vals,levels = levels,colors = 'k') ax.contourf(w1_vals, w2_vals, func_vals,levels = levels,cmap = 'Blues') # plot points on contour for j in range(len(self.w_hist)): w_val = self.w_hist[j] g_val = self.g(w_val) # plot in left panel if pts == 'on': ax.scatter(w_val[0],w_val[1],s = 30,c = colorspec[j],edgecolor = 'k',linewidth = 1.5*math.sqrt((1/(float(j) + 1))),zorder = 3) ax2.scatter(j,g_val,s = 30,c = colorspec[j],edgecolor = 'k',linewidth = 0.7,zorder = 3) # plot point of tangency # plot connector between points for visualization purposes if j > 0: w_old = self.w_hist[j-1] w_new = self.w_hist[j] g_old = self.g(w_old) g_new = self.g(w_new) ax.plot([w_old[0],w_new[0]],[w_old[1],w_new[1]],color = colorspec[j],linewidth = linewidth,alpha = 1,zorder = 2) # plot approx ax.plot([w_old[0],w_new[0]],[w_old[1],w_new[1]],color = 'k',linewidth = linewidth + 0.4,alpha = 1,zorder = 1) # plot approx ax2.plot([j-1,j],[g_old,g_new],color = colorspec[j],linewidth = 2,alpha = 1,zorder = 2) # plot approx ax2.plot([j-1,j],[g_old,g_new],color = 'k',linewidth = 2.5,alpha = 1,zorder = 1) # plot approx # clean up panel ax.set_xlabel('$w_1$',fontsize = 12) ax.set_ylabel('$w_2$',fontsize = 12,rotation = 0,labelpad = 15) ax.axhline(y=0, color='k',zorder = 0,linewidth = 0.5) ax.axvline(x=0, color='k',zorder = 0,linewidth = 0.5) ax.set_xlim([xmin,xmax]) ax.set_ylim([ymin,ymax]) # set tickmarks ax.set_xticks(np.arange(round(xmin), round(xmax) + 1, 1.0)) ax.set_yticks(np.arange(round(ymin), round(ymax) + 1, 1.0)) else: # function is single input, plot curve xmin = -2 xmax = 2 if 'xmin' in kwargs: xmin = kwargs['xmin'] if 'xmax' in kwargs: xmax = kwargs['xmax'] w_plot = np.linspace(xmin,xmax,500) g_plot = np.asarray([self.g(s) for s in w_plot]) ax.plot(w_plot,g_plot,color = 'k',linewidth = 2,zorder = 2) # set viewing limits ymin = min(g_plot) ymax = max(g_plot) ygap = (ymax - ymin)*0.2 ymin -= ygap ymax += ygap ax.set_ylim([ymin,ymax]) # clean up panel ax.axhline(y=0, color='k',zorder = 1,linewidth = 0.25) ax.axvline(x=0, color='k',zorder = 1,linewidth = 0.25) ax.set_xlabel(r'$w$',fontsize = 13) ax.set_ylabel(r'$g(w)$',fontsize = 13,rotation = 0,labelpad = 25) # function single-input, plot input and evaluation points on function for j in range(len(self.w_hist)): w_val = self.w_hist[j] g_val = self.g(w_val) ax.scatter(w_val,g_val,s = 90,c = colorspec[j],edgecolor = 'k',linewidth = 0.5*((1/(float(j) + 1)))**(0.4),zorder = 3,marker = 'X') # evaluation on function ax.scatter(w_val,0,s = 90,facecolor = colorspec[j],edgecolor = 'k',linewidth = 0.5*((1/(float(j) + 1)))**(0.4), zorder = 3) ax2.scatter(j,g_val,s = 30,c = colorspec[j],edgecolor = 'k',linewidth = 0.7,zorder = 3) # plot point of tangency # plot connector between points for visualization purposes if j > 0: w_old = self.w_hist[j-1][0] w_new = self.w_hist[j][0] g_old = self.g(w_old) g_new = self.g(w_new) ax2.plot([j-1,j],[g_old,g_new],color = colorspec[j],linewidth = 2,alpha = 1,zorder = 2) # plot approx ax2.plot([j-1,j],[g_old,g_new],color = 'k',linewidth = 2.5,alpha = 1,zorder = 1) # plot approx # clean panels ax2.axhline(y=0, color='k',zorder = 0,linewidth = 0.5) ax2.set_xlabel('iteration',fontsize = 12) ax2.set_ylabel(r'$g(w)$',fontsize = 12,rotation = 0,labelpad = 25) ax.set(aspect = 'equal') a = ax.get_position() yr = ax.get_position().y1 - ax.get_position().y0 xr = ax.get_position().x1 - ax.get_position().x0 aspectratio=1.25*xr/yr# + min(xr,yr) ratio_default=(ax2.get_xlim()[1]-ax2.get_xlim()[0])/(ax2.get_ylim()[1]-ax2.get_ylim()[0]) ax2.set_aspect(ratio_default*aspectratio) # plot plt.show()
def fit(self, **kwargs): # basic parameters for gradient descent run (default algorithm) max_its = 500 alpha_choice = 10**(-1) self.w_init = self.initializer() optimizer = 'gradient_descent' epsilon = 10**(-10) # set parameters by hand if 'max_its' in kwargs: self.max_its = kwargs['max_its'] if 'alpha_choice' in kwargs: self.alpha_choice = kwargs['alpha_choice'] if 'optimizer' in kwargs: optimizer = kwargs['optimizer'] if 'epsilon' in kwargs: epsilon = kwargs['epsilon'] if 'init' in kwargs: print('here') self.w_init = kwargs['init'] # batch size for gradient descent? self.num_pts = np.size(self.y_train) self.batch_size = np.size(self.y_train) if 'batch_size' in kwargs: self.batch_size = kwargs['batch_size'] # optimize weight_history = [] # run gradient descent if optimizer == 'gradient_descent': weight_history = optimizers.gradient_descent( self.cost, self.alpha_choice, self.max_its, self.w_init, self.num_pts, self.batch_size) if optimizer == 'newtons_method': weight_history = optimizers.newtons_method(self.cost, self.max_its, self.w_init, self.num_pts, self.batch_size, epsilon=epsilon) # compute training and validation cost histories train_cost_history = [ self.cost(v, np.arange(np.size(self.y_train))) for v in weight_history ] valid_cost_history = [ self.valid_cost(v, np.arange(np.size(self.y_valid))) for v in weight_history ] # store all new histories self.weight_histories.append(weight_history) self.train_cost_histories.append(train_cost_history) self.valid_cost_histories.append(valid_cost_history) # if classification produce count history if self.cost_name == 'softmax' or self.cost_name == 'perceptron' or self.cost_name == 'multiclass_softmax' or self.cost_name == 'multiclass_perceptron': train_count_history = [self.counter(v) for v in weight_history] valid_count_history = [ self.valid_counter(v) for v in weight_history ] # store count history self.train_count_histories.append(train_count_history) self.valid_count_histories.append(valid_count_history)
def animate_it_3d(self, w_hist, **kwargs): self.w_hist = w_hist ##### setup figure to plot ##### # initialize figure fig = plt.figure(figsize=(8, 3)) artist = fig # create subplot with 3 panels, plot input function in center plot gs = gridspec.GridSpec(1, 2, width_ratios=[2, 1]) ax1 = plt.subplot(gs[0], projection='3d') ax2 = plt.subplot(gs[1]) # produce color scheme s = np.linspace(0, 1, len(self.w_hist[:round(len(self.w_hist) / 2)])) s.shape = (len(s), 1) t = np.ones(len(self.w_hist[round(len(self.w_hist) / 2):])) t.shape = (len(t), 1) s = np.vstack((s, t)) self.colorspec = [] self.colorspec = np.concatenate((s, np.flipud(s)), 1) self.colorspec = np.concatenate((self.colorspec, np.zeros( (len(s), 1))), 1) # seed left panel plotting range viewmax = 3 if 'viewmax' in kwargs: viewmax = kwargs['viewmax'] r = np.linspace(-viewmax, viewmax, 200) # create grid from plotting range x1_vals, x2_vals = np.meshgrid(r, r) x1_vals.shape = (len(r)**2, 1) x2_vals.shape = (len(r)**2, 1) x1_vals.shape = (np.size(r), np.size(r)) x2_vals.shape = (np.size(r), np.size(r)) # seed left panel view view = [20, 50] if 'view' in kwargs: view = kwargs['view'] # set zaxis to the left self.move_axis_left(ax1) # start animation num_frames = len(self.w_hist) print('starting animation rendering...') def animate(k): # clear panels ax1.cla() # set axis in left panel self.move_axis_left(ax1) # current color color = self.colorspec[k] # print rendering update if np.mod(k + 1, 25) == 0: print('rendering animation frame ' + str(k + 1) + ' of ' + str(num_frames)) if k == num_frames - 1: print('animation rendering complete!') time.sleep(1.5) clear_output() ###### make left panel - plot data and fit ###### # initialize fit w = self.w_hist[k] # reshape and plot the surface, as well as where the zero-plane is y_fit = w[0] + w[1] * x1_vals + w[2] * x2_vals # plot cost surface ax1.plot_surface(x1_vals, x2_vals, y_fit, alpha=0.1, color=color, rstride=25, cstride=25, linewidth=0.25, edgecolor='k', zorder=2) # scatter data self.scatter_pts(ax1) #ax1.view_init(view[0],view[1]) # plot connector between points for visualization purposes if k == 0: w_new = self.w_hist[k] g_new = self.least_squares(w_new)[0] ax2.scatter(k, g_new, s=0.1, color='w', linewidth=2.5, alpha=0, zorder=1) # plot approx if k > 0: w_old = self.w_hist[k - 1] w_new = self.w_hist[k] g_old = self.least_squares(w_old)[0] g_new = self.least_squares(w_new)[0] ax2.plot([k - 1, k], [g_old, g_new], color=color, linewidth=2.5, alpha=1, zorder=2) # plot approx ax2.plot([k - 1, k], [g_old, g_new], color='k', linewidth=3.5, alpha=1, zorder=1) # plot approx # set viewing limits for second panel ax2.axhline(y=0, color='k', zorder=0, linewidth=0.5) ax2.set_xlabel('iteration', fontsize=12) ax2.set_ylabel(r'$g(\mathbf{w})$', fontsize=12, rotation=0, labelpad=25) ax2.set_xlim([-0.5, len(self.w_hist)]) # set axis in left panel self.move_axis_left(ax1) return artist, anim = animation.FuncAnimation(fig, animate, frames=num_frames, interval=num_frames, blit=True) return (anim)
def train(x,y,**kwargs): # get and run optimizer to solve two-class problem N = np.shape(x)[0] C = np.size(np.unique(y)) max_its = 100; alpha_choice = 1; cost_name = 'softmax'; w = 0.1*np.random.randn(N+1,1); optimizer = 'gradient_descent'; # switches for user choices if 'max_its' in kwargs: max_its = kwargs['max_its'] if 'alpha_choice' in kwargs: alpha_choice = kwargs['alpha_choice'] if 'cost_name' in kwargs: cost_name = kwargs['cost_name'] if 'w' in kwargs: w = kwargs['w'] if 'optimizer' in kwargs: optimizer = kwargs['optimizer'] epsilon = 10**(-7) if 'epsilon' in kwargs: epsilon = kwargs['epsilon'] # loop over subproblems and solve weight_histories = [] for c in range(0,C): # prepare temporary C vs notC sub-probem labels y_temp = copy.deepcopy(y) ind = np.argwhere(y_temp.astype(int) == c) ind = ind[:,1] ind2 = np.argwhere(y_temp.astype(int) != c) ind2 = ind2[:,1] y_temp[0,ind] = 1 y_temp[0,ind2] = -1 # store best weight for final classification cost = cost_lib.choose_cost(x,y_temp,cost_name) # run optimizer weight_history = 0; cost_history = 0; if optimizer == 'gradient_descent': weight_history,cost_history = optimizers.gradient_descent(cost,alpha_choice,max_its,w) if optimizer == 'newtons_method': weight_history,cost_history = optimizers.newtons_method(cost,max_its,w=w,epsilon = epsilon) # store each weight history weight_histories.append(copy.deepcopy(weight_history)) # combine each individual classifier weights into single weight # matrix per step R = len(weight_histories[0]) combined_weights = [] for r in range(R): a = [] for c in range(C): a.append(weight_histories[c][r]) a = np.array(a).T a = a[0,:,:] combined_weights.append(a) # run combined weight matrices through fusion rule to calculate # number of misclassifications per step counter = cost_lib.choose_cost(x,y,'multiclass_counter') count_history = [counter(v) for v in combined_weights] return combined_weights, count_history
def newtons_method(self, g, win, **kwargs): # flatten gradient for simpler-written descent loop self.g, unflatten, w = flatten_func(g, win) self.grad = compute_grad(self.g) self.hess = compute_hess(self.g) # parse optional arguments max_its = 20 if 'max_its' in kwargs: max_its = kwargs['max_its'] self.epsilon = 10**-10 if 'epsilon' in kwargs: self.epsilon = kwargs['epsilon'] verbose = True if 'verbose' in kwargs: verbose = kwargs['verbose'] output = 'history' if 'output' in kwargs: output = kwargs['output'] self.counter = copy.deepcopy(self.g) if 'counter' in kwargs: counter = kwargs['counter'] self.counter, unflatten, w = flatten_func(counter, win) # create container for weight history w_hist = [] w_hist.append(unflatten(copy.deepcopy(w))) # start newton's method loop if verbose == True: print('starting optimization...') geval_old = self.g(w) self.w_best = unflatten(copy.deepcopy(w)) g_best = self.counter(w) w_hist = [] if output == 'history': w_hist.append(unflatten(w)) # loop for k in range(max_its): # compute gradient and hessian grad_val = self.grad(w) hess_val = self.hess(w) hess_val.shape = (np.size(w), np.size(w)) # solve linear system for weights C = hess_val + self.epsilon * np.eye(np.size(w)) w = np.linalg.solve(C, np.dot(C, w) - grad_val) # eject from process if reaching singular system geval_new = self.g(w) if k > 2 and geval_new > geval_old: print('singular system reached') time.sleep(1.5) clear_output() if output == 'history': return w_hist elif output == 'best': return self.w_best else: geval_old = geval_new # record current weights if output == 'best': if self.g(w) < g_best: g_best = self.counter(w) self.w_best = copy.deepcopy(unflatten(w)) w_hist.append(unflatten(w)) if verbose == True: print('...optimization complete!') time.sleep(1.5) clear_output() if output == 'best': return self.w_best elif output == 'history': return w_hist
def least_squares(self,w): cost = np.sum((self.model(self.x,w) - self.y)**2) return cost/float(np.size(self.y))
pos = np.array([0,0,0]) qM = np.eye(3) rs = x0[:N] ps = x0[N:2*N] ys = x0[2*N:] ll = link_lengths for r,p,y,l in zip(rs,ps,ys,ll): pos0 = pos pos,qM = fwd(pos,l,r,p,y,qM) ax.plot([pos0[0],pos[0]], [pos0[1],pos[1]],[pos0[2],pos[2]]) print(pos,np.sqrt(((pos-pos0)**2).sum()),qM,'\n') # plot spheres for o in obstacles: u = np.linspace(0, 2 * np.pi, 10) v = np.linspace(0, np.pi, 10) x = o[3] * np.outer(np.cos(u), np.sin(v)) + o[0] y = o[3] * np.outer(np.sin(u), np.sin(v)) + o[1] z = o[3] * np.outer(np.ones(np.size(u)), np.cos(v)) + o[2] ax.plot_surface(x, y, z, color='b') # plot goal ax.scatter(target[0], target[1], target[2], c='r') ax.legend() ax.set_xlim(-6,6) ax.set_ylim(-6,6) ax.set_zlim(-6,6) plt.show()
def softmax(self,w): cost = np.sum(np.log(1 + np.exp(-self.y*self.model(self.x,w)))) return cost/float(np.size(self.y))
#( data, loss_func,mn,vn,b1,b2,batch_size, learning_rate,iteration) adam = AdamOptimizer(train_x, train_y_exact, w1, w2, loss_func, tuning_params) #create object adam.find_weights() #train (final_w1, final_w2) = adam.get_weights() #get trained weights test_data_x = get_test_data(all_data) #get test data test_data_x = normalize(test_data_x) #standardize data test_data_desired = get_test_data_result( all_data) #get exact outputs for test data test_data_predict = forward_pass( test_data_x, final_w1, final_w2, final_w3, final_w4) #run forward pass using trained weights test_data_size = np.size(test_data_predict, 0) for i in range(test_data_size): if test_data_predict[i] < 0.5: test_data_predict[i] = 0 else: test_data_predict[i] = 1 confuse = confusion_matrix(test_data_desired, test_data_predict) accuracy = confuse.trace() / confuse.sum() print(accuracy) plt.figure(1, figsize=(15, 6)) plt.subplot(1, 2, 1) x_axis = np.arange(np.size(adam.loss_array, 0)) plt.plot(x_axis, adam.loss_array, linewidth=3.0, label='loss ') plt.legend()
def seir(self, y, t, parameters, controls, stochastic=False): # define the right hand side of the ode systems given state y, time t, parameters, and controls if self.number_group > 1: y = y.reshape((10, self.number_group)) S, E, Q, A, I, H, R, D, Tc, Tu = y # q, tau, HFR, kappa, beta, delta, sigma, eta_I, eta_Q, mu, gamma_I, gamma_A, gamma_H, gamma_Q = parameters # _, _, _, delta, sigma, eta_I, eta_Q, mu, gamma_I, gamma_A, gamma_H, gamma_Q = parameters alpha, q, tau, HFR, kappa, beta, delta, sigma, eta_I, eta_Q, mu, gamma_I, gamma_A, gamma_H, gamma_Q = controls # alpha, q, tau, HFR, kappa, beta, _, _, _, _, _, _, _, _, _ = controls # alpha = (np.tanh(alpha) + 1)/2 alpha = self.interpolation(t, self.t_control, alpha) # alpha, q, tau, HFR, kappa = [self.interpolation(t, self.t_control, controls[i]) for i in range(self.number_time_dependent_controls)] # tau_p = self.interpolation(t - np.max(1./sigma), self.t_control, controls[2]) # tau_p = self.interpolation(t, self.t_control, controls[2]) # tau_p = tau # IHR = np.divide(kappa, np.max(kappa) + tau_p) IHR = kappa QHR = ewm(tau, IHR) # gamma_A = gamma_I pi = self.proportion2factor(IHR, eta_I, gamma_I) nu = self.proportion2factor(HFR, mu, gamma_H) rho = self.proportion2factor(QHR, eta_Q, gamma_Q) contact = self.contact_rate(t) # theta_I = 2 - tau # theta_A = 1 - tau theta_I = 1. - 0 * tau theta_A = 1. - 0 * tau delta = 1. + 0 * delta C_E = ewm( 1 - alpha, ewm( 1 - q, ewm(delta, np.dot(contact, ewm(theta_I, np.divide(I, self.N_total))))) + np.dot(contact, ewm(theta_A, np.divide(A, self.N_total)))) C_Q = ewm( 1 - alpha, ewm( q, ewm(delta, np.dot(contact, ewm(theta_I, np.divide(I, self.N_total)))))) if stochastic: zeros = np.zeros(np.size(S)) S = np.max([zeros, S], axis=0) E = np.max([zeros, E], axis=0) Q = np.max([zeros, Q], axis=0) A = np.max([zeros, A], axis=0) I = np.max([zeros, I], axis=0) H = np.max([zeros, H], axis=0) P1 = ewm(beta, ewm(C_E, S)) P2 = ewm(beta, ewm(C_Q, S)) P3 = ewm(tau, ewm(sigma, E)) P4 = ewm(1 - tau, ewm(sigma, E)) P5 = ewm(rho, ewm(eta_Q, Q)) P6 = ewm(1 - rho, ewm(gamma_Q, Q)) P7 = ewm(gamma_A, A) P8 = ewm(pi, ewm(eta_I, I)) P9 = ewm(1 - pi, ewm(gamma_I, I)) P10 = ewm(nu, ewm(mu, H)) P11 = ewm(1 - nu, ewm(gamma_H, H)) if stochastic: P1 = np.random.poisson(P1) P2 = np.random.poisson(P2) P3 = np.random.poisson(P3) P4 = np.random.poisson(P4) P5 = np.random.poisson(P5) P6 = np.random.poisson(P6) P7 = np.random.poisson(P7) P8 = np.random.poisson(P8) P9 = np.random.poisson(P9) P10 = np.random.poisson(P10) P11 = np.random.poisson(P11) dS = -P1 - P2 dE = P1 - P3 - P4 dQ = P2 - P5 - P6 dA = P4 - P7 dI = P3 - P8 - P9 dH = P8 + P5 - P10 - P11 dR = P7 + P9 + P11 + P6 dD = P10 dTc = P3 + P2 # + quarantined, P2 dTu = P4 dydt = np.array([dS, dE, dQ, dA, dI, dH, dR, dD, dTc, dTu]).flatten("C") return dydt
def get_test_data(data): #get all data input for testing from the csv file n = np.size(data, 0) n = 2 * n / 3 n = int(np.round(n)) test_data = data[n:, :-1] return test_data
def point_and_projection(self,point1,point2): # generate range for viewing limits minx = min(min(self.x[:,0]),min(self.x[:,1])) maxx = max(max(self.x[:,0]),max(self.x[:,1])) gapx = (maxx - minx)*0.1 minx -= gapx maxx += gapx # initialize figure fig = plt.figure(figsize = (8,4)) gs = gridspec.GridSpec(1, 2,width_ratios = [1,1]) # setup current axis ax = plt.subplot(gs[0],aspect = 'equal'); ax2 = plt.subplot(gs[1],aspect = 'equal'); ### plot left panel - data, separators, and region coloring self.plot_data(ax) self.plot_all_separators(ax) ### determine projections etc., point = [1] + point1 point = np.asarray(point) point.shape = (len(point),1) y = np.dot(self.W,point) ind = np.argwhere(y > 0) if np.size(ind) == 0: num_classes = len(np.unique(self.y)) ind = np.arange(num_classes).tolist() else: ind = [v[0] for v in ind] point = point[1:] ax.scatter(point[0],point[1],c = 'k',edgecolor = 'w',linewidth = 1,s = 90) # loop over classifiers and project for i in ind: # get weights w = self.W[i] w = np.asarray(w) w.shape = (len(w),1) w_norm = sum([v**2 for v in w[1:]]) # make projected point add_on = w[0] + sum([v*a for v,a in zip(point,w[1:])]) add_on /= w_norm proj_point = copy.deepcopy(point) proj_point -= add_on*w[1:] # projected point ax.scatter(proj_point[0],proj_point[1],c = self.colors[i],edgecolor = 'k',linewidth = 1,s = 60,zorder = 4,marker = 'X') # dashed line l = np.linspace(proj_point[0],point[0],200) b = np.linspace(proj_point[1],point[1],200) ax.plot(l,b,linewidth = 1,linestyle = '--',color = 'k',zorder = 3) # dress panels ax.set_xlim(minx,maxx) ax.set_ylim(minx,maxx) ax.axis('off') ### plot left panel - data, separators, and region coloring self.plot_data(ax2) self.plot_all_separators(ax2) ### determine projections etc., point = [1] + point2 point = np.asarray(point) point.shape = (len(point),1) y = np.dot(self.W,point) ind = np.argwhere(y > 0) if np.size(ind) == 0: num_classes = len(np.unique(self.y)) ind = np.arange(num_classes).tolist() else: ind = [v[0] for v in ind] point = point[1:] ax2.scatter(point[0],point[1],c = 'k',edgecolor = 'w',linewidth = 1,s = 90) # loop over classifiers and project for i in ind: # get weights w = self.W[i] w = np.asarray(w) w.shape = (len(w),1) w_norm = sum([v**2 for v in w[1:]]) # make projected point add_on = w[0] + sum([v*a for v,a in zip(point,w[1:])]) add_on /= w_norm proj_point = copy.deepcopy(point) proj_point -= add_on*w[1:] # projected point ax2.scatter(proj_point[0],proj_point[1],c = self.colors[i],edgecolor = 'k',linewidth = 1,s = 60,zorder = 4,marker = 'X') # dashed line l = np.linspace(proj_point[0],point[0],200) b = np.linspace(proj_point[1],point[1],200) ax2.plot(l,b,linewidth = 1,linestyle = '--',color = 'k',zorder = 3) # dress panels ax2.set_xlim(minx,maxx) ax2.set_ylim(minx,maxx) ax2.axis('off')
def train(x, y, feature_transforms, **kwargs): # get and run optimizer to solve two-class problem N = np.shape(x)[0] C = np.size(np.unique(y)) max_its = 100 alpha_choice = 1 cost_name = 'softmax' normalize = 'standard' w = 0.1 * np.random.randn(N + 1, 1) # switches for user choices if 'max_its' in kwargs: max_its = kwargs['max_its'] if 'alpha_choice' in kwargs: alpha_choice = kwargs['alpha_choice'] if 'cost_name' in kwargs: cost_name = kwargs['cost_name'] if 'w' in kwargs: w = kwargs['w'] if 'normalize' in kwargs: normalize = kwargs['normalize'] # loop over subproblems and solve weight_histories = [] for c in range(0, C): # prepare temporary C vs notC sub-probem labels y_temp = copy.deepcopy(y) ind = np.argwhere(y_temp.astype(int) == c) ind = ind[:, 0] ind2 = np.argwhere(y_temp.astype(int) != c) ind2 = ind2[:, 0] y_temp[ind] = 1 y_temp[ind2] = -1 # run on normalized data run = basic_runner.Setup(x, y_temp, feature_transforms, cost_name, normalize=normalize) run.fit(w=w, alpha_choice=alpha_choice, max_its=max_its) # store each weight history weight_histories.append(run.weight_history) # combine each individual classifier weights into single weight # matrix per step R = len(weight_histories[0]) combined_weights = [] for r in range(R): a = [] for c in range(C): a.append(weight_histories[c][r]) a = np.array(a).T a = a[0, :, :] combined_weights.append(a) # run combined weight matrices through fusion rule to calculate # number of misclassifications per step counter = basic_runner.Setup(x, y, feature_transforms, 'multiclass_counter', normalize=normalize).cost_func count_history = [counter(v) for v in combined_weights] return combined_weights, count_history
def solve_2class_subproblems(self,**kwargs): # parse args max_its = 5 if 'max_its' in kwargs: max_its = kwargs['max_its'] alpha = 10**-3 if 'alpha' in kwargs: alpha = kwargs['alpha'] steplength_rule = 'none' if 'steplength_rule' in kwargs: steplength_rule = kwargs['steplength_rule'] version = 'unnormalized' if 'version' in kwargs: version = kwargs['version'] algo = 'newtons_method' if 'algo' in kwargs: algo = kwargs['algo'] #### perform all optimizations ### self.g = self.softmax if 'cost' in kwargs: cost = kwargs['cost'] if cost == 'softmax': self.g = self.softmax if cost == 'relu': self.g = self.relu # loop over subproblems and solve self.W = [] num_classes = np.size(np.unique(self.y)) for i in range(0,num_classes): #print ('solving sub-problem number ' + str(i+1)) # prepare temporary C vs notC sub-probem labels self.y_temp = copy.deepcopy(self.y) ind = np.argwhere(self.y_temp == (i)) ind = ind[:,0] ind2 = np.argwhere(self.y_temp != (i)) ind2 = ind2[:,0] self.y_temp[ind] = 1 self.y_temp[ind2] = -1 # solve the current subproblem if algo == 'gradient_descent':# run gradient descent w_hist = self.opt.gradient_descent(g = self.g,w = np.random.randn(np.shape(self.x)[1]+1,1),version = version,max_its = max_its, alpha = alpha,steplength_rule = steplength_rule) elif algo == 'newtons_method': w_hist = self.opt.newtons_method(g = self.g,w = np.random.randn(np.shape(self.x)[1]+1,1),max_its = max_its,epsilon = 10**(-5)) # store best weight for final classification g_count = [] for j in range(len(w_hist)): w = w_hist[j] gval = self.g(w) g_count.append(gval) ind = np.argmin(g_count) w = w_hist[ind] # normalize normal vectors for each classifier w_norm = sum([v**2 for v in w[1:]])**(0.5) w_1N = [v/w_norm for v in w] self.W.append(w_1N) # reshape self.W = np.asarray(self.W) self.W.shape = (num_classes,np.shape(self.x)[1] + 1)
def static_fig(self, w_hist, **kwargs): self.w_hist = w_hist ind = -1 show_path = True if np.size(w_hist) == 0: show_path = False w = 0 if show_path: w = w_hist[ind] ##### setup figure to plot ##### # initialize figure fig = plt.figure(figsize=(8, 3)) artist = fig # create subplot with 3 panels, plot input function in center plot gs = gridspec.GridSpec(1, 2, width_ratios=[1, 1]) ax1 = plt.subplot(gs[0]) ax2 = plt.subplot(gs[1]) # produce color scheme s = np.linspace(0, 1, len(self.w_hist[:round(len(self.w_hist) / 2)])) s.shape = (len(s), 1) t = np.ones(len(self.w_hist[round(len(self.w_hist) / 2):])) t.shape = (len(t), 1) s = np.vstack((s, t)) self.colorspec = [] self.colorspec = np.concatenate((s, np.flipud(s)), 1) self.colorspec = np.concatenate((self.colorspec, np.zeros( (len(s), 1))), 1) # seed left panel plotting range xmin = copy.deepcopy(min(self.x)) xmax = copy.deepcopy(max(self.x)) xgap = (xmax - xmin) * 0.1 xmin -= xgap xmax += xgap x_fit = np.linspace(xmin, xmax, 300) # seed right panel contour plot viewmax = 3 if 'viewmax' in kwargs: viewmax = kwargs['viewmax'] view = [20, 100] if 'view' in kwargs: view = kwargs['view'] num_contours = 15 if 'num_contours' in kwargs: num_contours = kwargs['num_contours'] ### contour plot in right panel ### self.contour_plot(ax2, viewmax, num_contours) ### make left panel - plot data and fit ### # scatter data self.scatter_pts(ax1) if show_path: # initialize fit y_fit = np.tanh(w[0] + x_fit * w[1]) # plot fit to data color = self.colorspec[-1] ax1.plot(x_fit, y_fit, color=color, linewidth=2) # add points to right panel contour plot num_frames = len(self.w_hist) for k in range(num_frames): # current color color = self.colorspec[k] # current weights w = self.w_hist[k] ###### make right panel - plot contour and steps ###### if k == 0: ax2.scatter(w[0], w[1], s=90, facecolor=color, edgecolor='k', linewidth=0.5, zorder=3) if k > 0 and k < num_frames: self.plot_pts_on_contour(ax2, k, color) if k == num_frames - 1: ax2.scatter(w[0], w[1], s=90, facecolor=color, edgecolor='k', linewidth=0.5, zorder=3) plt.show()
def conv_layer_testing(self, tensor, kernels, stats): # square up tensor into tensor of patches tensor = tensor.reshape(np.shape(tensor)[0], int((np.shape(tensor)[1])**(0.5)), int((np.shape(tensor)[1])**(0.5)), order='F') # pad tensor kernel = kernels[0] padded_tensor = self.pad_tensor(tensor, kernel) # window tensor wind_tensor = self.sliding_window_tensor(padded_tensor, kernel, stride=1) # normalize windows since they touch weights a_means = 0 a_stds = 0 if np.size(stats) == 0: a_means = np.mean(wind_tensor, axis=0) a_stds = np.std(wind_tensor, axis=0) stats = [a_means, a_stds] else: a_means = stats[0][0] a_stds = stats[0][1] wind_tensor = self.normalize(wind_tensor, a_means, a_stds) #### compute convolution feature maps / downsample via pooling one map at a time over entire tensor ##### kernel2 = np.ones((6, 6)) stride = 3 new_tensors = [] for kernel in kernels: #### make convolution feature map - via matrix multiplication over windowed tensor feature_map = np.dot(wind_tensor, kernel.flatten()[:, np.newaxis]) # reshape convolution feature map into array feature_map.shape = (np.shape(tensor)) feature_map = np.asarray(feature_map) # now shove result through nonlinear activation feature_map = self.activation(feature_map) #### now pool / downsample feature map, first window then pool on each window wind_featmap = self.sliding_window_tensor(feature_map, kernel2, stride=stride) # max pool on each collected patch max_pool = np.max(wind_featmap, axis=1) # reshape into new tensor max_pool.shape = (np.shape(tensor)[0], int((np.shape(max_pool)[0] / float(np.shape(tensor)[0]))**(0.5)), int((np.shape(max_pool)[0] / float(np.shape(tensor)[0]))**(0.5))) # reshape into new downsampled pooled feature map new_tensors.append(max_pool) # turn into array new_tensors = np.asarray(new_tensors) # reshape into final feature vector to touch fully connected layer(s), otherwise keep as is in terms of shape new_tensors = new_tensors.swapaxes(0, 1) new_tensors = np.reshape( new_tensors, (np.shape(new_tensors)[0], np.shape(new_tensors)[1], np.shape(new_tensors)[2] * np.shape(new_tensors)[3])) new_tensors = np.reshape( new_tensors, (np.shape(new_tensors)[0], np.shape(new_tensors)[1] * np.shape(new_tensors)[2]), order='F') return new_tensors, stats
def newtons_method(g, max_its, w, **kwargs): # flatten input funciton, in case it takes in matrices of weights flat_g, unflatten, w = flatten_func(g, w) # compute the gradient / hessian functions of our input function - # note these are themselves functions. In particular the gradient - # - when evaluated - returns both the gradient and function evaluations (remember # as discussed in Chapter 3 we always ge the function evaluation 'for free' when we use # an Automatic Differntiator to evaluate the gradient) gradient = value_and_grad(flat_g) hess = hessian(flat_g) # set numericxal stability parameter / regularization parameter epsilon = 10**(-7) if 'epsilon' in kwargs: beta = kwargs['epsilon'] # run the newtons method loop weight_history = [] # container for weight history cost_history = [] # container for corresponding cost function history for k in range(max_its): # evaluate the gradient, store current weights and cost function value cost_eval, grad_eval = gradient(w) weight_history.append(unflatten(w)) cost_history.append(cost_eval) # evaluate the hessian hess_eval = hess(w) # reshape for numpy linalg functionality hess_eval.shape = (int( (np.size(hess_eval))**(0.5)), int((np.size(hess_eval))**(0.5))) # solve second order system system for weight update w = w - np.dot( np.linalg.pinv(hess_eval + epsilon * np.eye(np.size(w))), grad_eval) # collect final weights weight_history.append(unflatten(w)) # compute final cost function value via g itself (since we aren't computing # the gradient at the final step we don't get the final cost function value # via the Automatic Differentiatoor) cost_history.append(flat_g(w)) return weight_history, cost_history # gradient descent function - inputs: g (input function), alpha (steplength parameter), max_its (maximum number of iterations), w (initialization) def gradient_descent(g, alpha_choice, max_its, w): # compute the gradient function of our input function - note this is a function too # that - when evaluated - returns both the gradient and function evaluations (remember # as discussed in Chapter 3 we always ge the function evaluation 'for free' when we use # an Automatic Differntiator to evaluate the gradient) gradient = value_and_grad(g) # run the gradient descent loop weight_history = [] # container for weight history cost_history = [] # container for corresponding cost function history alpha = 0 for k in range(1, max_its + 1): # check if diminishing steplength rule used if alpha_choice == 'diminishing': alpha = 1 / float(k) else: alpha = alpha_choice # evaluate the gradient, store current weights and cost function value cost_eval, grad_eval = gradient(w) weight_history.append(w) cost_history.append(cost_eval) # take gradient descent step w = w - alpha * grad_eval # collect final weights weight_history.append(w) # compute final cost function value via g itself (since we aren't computing # the gradient at the final step we don't get the final cost function value # via the Automatic Differentiatoor) cost_history.append(g(w)) return weight_history, cost_history