Exemplo n.º 1
0
    def vjp_all(g):
        
        vjp_y = g[-1, :]
        vjp_t0 = 0
        time_vjp_list = []
        vjp_args = np.zeros(np.size(flat_args))
        
        for i in range(T - 1, 0, -1):

            # Compute effect of moving measurement time.
            vjp_cur_t = np.dot(func(yt[i, :], t[i], *func_args), g[i, :])
            time_vjp_list.append(vjp_cur_t)
            vjp_t0 = vjp_t0 - vjp_cur_t

            # Run augmented system backwards to the previous observation.
            aug_y0 = np.hstack((yt[i, :], vjp_y, vjp_t0, vjp_args))
            aug_ans = odeint(augmented_dynamics, aug_y0,
                             np.array([t[i], t[i - 1]]), tuple((flat_args,)), **kwargs)
            _, vjp_y, vjp_t0, vjp_args = unpack(aug_ans[1])

            # Add gradient from current output.
            vjp_y = vjp_y + g[i - 1, :]

        time_vjp_list.append(vjp_t0)
        vjp_times = np.hstack(time_vjp_list)[::-1]

        return None, vjp_y, vjp_times, unflatten(vjp_args)
Exemplo n.º 2
0
    def auto_diff(self, params, unknowns, resids):
        plist = params.keys()
        ulist = unknowns.keys()

        def wrap(inputs):
            _unknowns = {i : unknowns[i] for i in unknowns}
            _resids = {}
            _params = dict(zip(plist, inputs))
            self.solve_nonlinear(_params, _unknowns, resids)
            return [_unknowns[unk] for unk in ulist]

        gradfunc = jacobian(wrap)
        inp = [params[val] for val in plist]
        df = gradfunc(inp)

        J = {}
        for i, unk in enumerate(ulist):
            for k, inp in enumerate(plist):
                gshape = (np.size(unknowns[unk]), 
                                     np.size(params[inp]))
                J[unk, inp] = np.reshape(df[i][k], gshape, order="C")
        return J
    def region_coloring(self,region,ax):        
        #### color first regions  ####
        # generate input range for functions
        minx = min(min(self.x[:,0]),min(self.x[:,1]))
        maxx = max(max(self.x[:,0]),max(self.x[:,1]))
        gapx = (maxx - minx)*0.1
        minx -= gapx
        maxx += gapx
        
        # plot over range
        r = np.linspace(minx,maxx,200)
        x1_vals,x2_vals = np.meshgrid(r,r)
        x1_vals.shape = (len(r)**2,1)
        x2_vals.shape = (len(r)**2,1)
        o = np.ones((len(r)**2,1))
        x = np.concatenate([o,x1_vals,x2_vals],axis = 1)
        
        ### for region 1, determine points that are uniquely positive for each classifier ###
        ind_set = []
        y = np.dot(self.W,x.T)
        num_classes = np.size(np.unique(self.y))
        
        if region == 1 or region == 'all':
            for i in range(0,num_classes):       
                class_inds = np.arange(num_classes)
                class_inds = np.delete(class_inds,(i),axis = 0)

                # loop over non-current classifier
                ind = np.argwhere(y[class_inds[0]] < 0).tolist()
                ind = [s[0] for s in ind]
                for j in range(1,len(class_inds)):
                    c_ind = class_inds[j]
                    ind2 = np.argwhere(y[c_ind] < 0).tolist()
                    ind2 = [s[0] for s in ind2]
                    ind = [s for s in ind if s in ind2]                

                ind2 = np.argwhere(y[i] > 0).tolist()
                ind2 = [s[0] for s in ind2]
                ind = [s for s in ind if s in ind2]

                # plot polygon over region defined by ind
                x1_ins = np.asarray([x1_vals[s] for s in ind])
                x1_ins.shape = (len(x1_ins),1)
                x2_ins = np.asarray([x2_vals[s] for s in ind])
                x2_ins.shape = (len(x2_ins),1)
                h = np.concatenate((x1_ins,x2_ins),axis = 1)
                vertices = ConvexHull(h).vertices
                poly = [h[v] for v in vertices]
                polygon = Polygon(poly, True)   
                patches = []
                patches.append(polygon)

                p = PatchCollection(patches, alpha=0.2,color = self.colors[i])
                ax.add_collection(p)
                
        if region == 2 or region == 'all':
            for i in range(0,num_classes):       
                class_inds = np.arange(num_classes)
                class_inds = np.delete(class_inds,(i),axis = 0)

                # loop over non-current classifier
                ind = np.argwhere(y[class_inds[0]] > 0).tolist()
                ind = [s[0] for s in ind]
                for j in range(1,len(class_inds)):
                    c_ind = class_inds[j]
                    ind2 = np.argwhere(y[c_ind] > 0).tolist()
                    ind2 = [s[0] for s in ind2]
                    ind = [s for s in ind if s in ind2]                

                ind2 = np.argwhere(y[i] < 0).tolist()
                ind2 = [s[0] for s in ind2]
                ind = [s for s in ind if s in ind2]

                # plot polygon over region defined by ind
                x1_ins = np.asarray([x1_vals[s] for s in ind])
                x1_ins.shape = (len(x1_ins),1)
                x2_ins = np.asarray([x2_vals[s] for s in ind])
                x2_ins.shape = (len(x2_ins),1)
                o = np.ones((len(x2_ins),1))
                h = np.concatenate((o,x1_ins,x2_ins),axis = 1)
                
                # determine regions dominated by one classifier or the other
                vals = []
                for c in class_inds:
                    w = self.W[int(c)]
                    nv = np.dot(w,h.T)
                    vals.append(nv)
                vals = np.asarray(vals)
                vals.shape = (len(class_inds),len(h))
                ind = np.argmax(vals,axis = 0)

                for j in range(len(class_inds)):
                    # make polygon for each subregion
                    ind1 = np.argwhere(ind == j)
                    x1_ins2 = np.asarray([x1_ins[s] for s in ind1])
                    x1_ins2.shape = (len(x1_ins2),1)
                    x2_ins2 = np.asarray([x2_ins[s] for s in ind1])
                    x2_ins2.shape = (len(x2_ins2),1)
                    h = np.concatenate((x1_ins2,x2_ins2),axis = 1)
                    
                    # find convex hull of points
                    vertices = ConvexHull(h).vertices
                    poly = [h[v] for v in vertices]
                    polygon = Polygon(poly, True)   
                    patches = []
                    patches.append(polygon)
                    c = class_inds[j]
                    p = PatchCollection(patches, alpha=0.2,color = self.colors[c])
                    ax.add_collection(p)
                    
        if region == 3 or region == 'all':
            # find negative zone of all classifiers
            ind = np.argwhere(y[0] < 0).tolist()
            ind = [s[0] for s in ind]
            for i in range(1,num_classes):
                ind2 = np.argwhere(y[i] < 0).tolist()
                ind2 = [s[0] for s in ind2]
                ind = [s for s in ind if s in ind2]                

            # loop over negative zone, find max area of each classifier
            x1_ins = np.asarray([x1_vals[s] for s in ind])
            x1_ins.shape = (len(x1_ins),1)
            x2_ins = np.asarray([x2_vals[s] for s in ind])
            x2_ins.shape = (len(x2_ins),1)
            o = np.ones((len(x2_ins),1))
            h = np.concatenate((o,x1_ins,x2_ins),axis = 1)
                
            # determine regions dominated by one classifier or the other
            vals = []
            for c in range(num_classes):
                w = self.W[c]
                nv = np.dot(w,h.T)
                vals.append(nv)
            vals = np.asarray(vals)
            vals.shape = (num_classes,len(h))
            ind = np.argmax(vals,axis = 0)

            # loop over each class, construct polygon region for each
            for c in range(num_classes):
                # make polygon for each subregion
                ind1 = np.argwhere(ind == c)
                x1_ins2 = np.asarray([x1_ins[s] for s in ind1])
                x1_ins2.shape = (len(x1_ins2),1)
                x2_ins2 = np.asarray([x2_ins[s] for s in ind1])
                x2_ins2.shape = (len(x2_ins2),1)
                h = np.concatenate((x1_ins2,x2_ins2),axis = 1)
                    
                # find convex hull of points
                vertices = ConvexHull(h).vertices
                poly = [h[v] for v in vertices]
                polygon = Polygon(poly, True)   
                patches = []
                patches.append(polygon)
                p = PatchCollection(patches, alpha=0.2,color = self.colors[c])
                ax.add_collection(p)    
Exemplo n.º 4
0
def fixed_points(rnn,
                 inp,
                 num_points=1,
                 eps=0.01,
                 opt_iters=10000,
                 thresh=1,
                 max_tries=100,
                 rand_init=1,
                 init_scale=5,
                 plot_loss=0):
    '''This function uses the trained parameters to find num_points fixed points. It does a gradient
    descent to minimize q(x), which is analagous to the energy of the system. To just plot the gradient descent loss
    and step size for finding a single fixed point,  set the plot_loss flag to 1.
    Inputs:
        rnn: Should be a JazNet class object.
        inp: A fixed value for the input(s). Can just be a list (e.g. [1,0])
        num_points: Number of points to find (if plot_loss=0)
        eps: Epsilon value that scales the step size
        opt_iters: How many iterations to run to try to converge on a fixed point
        thresh: Threshold for the norm of the network activity before calling it a fixed point
        rand_init: Randomly pick a starting point if 1 (default), otherwise go with the network's current activity.
        plot_loss: Will result in only finding one fixed point. Shows how loss function/step size changes. Default 0

    Outputs:
        all_points: Gives activity for all fixed points found in a num_points-by-N array
        fp_outputs: Network output at each fixed point. Note: Should change this depending on
            whether network uses tanh of activities for outpus, or if it has biases.
        trajectories: List with num_points elements, where each element is a TxN array, where T is the number of 
        steps it took to find the fixed point and N is the number of neurons.
        '''
    def output(x):
        return np.dot(np.tanh(x), rnn_par['out_weights'])

    def F(x):
        return (-x + np.dot(np.tanh(x), rnn_par['rec_weights']) +
                np.dot(inp, rnn_par['inp_weights']) + rnn_par['bias'])

    def q(x):
        return 1 / 2 * np.linalg.norm(F(x))**2

    def find_point(inp, opt_iters, eps):
        loss = []
        stepsize = []
        x_traj = []
        if rand_init:
            x = np.random.randn(
                rnn.act.size
            ) * init_scale  # The randomized initial activity needs to be big enough to relax to interesting points
        else:
            x = np.squeeze(rnn.act)
        for i in range(opt_iters):
            loss.append(q(x))
            if loss[i] < thresh:
                break
            step = eps * loss_grad(x)
            stepsize.append(np.linalg.norm(step))
            x = x - step
            x_traj.append(x)
        return x, loss, stepsize, x_traj

    start = time.time()
    rnn_par = rnn.rnn_par  # Extract the parameters
    loss_grad = grad(q)

    if plot_loss:  # To see the optimization process to find one fixed point
        x, loss, stepsize, x_traj = find_point(inp, opt_iters, eps)
        plt.figure()
        plt.subplot(1, 3, 1)
        plt.plot(loss[-100:-1])
        plt.title('Loss, last 100')
        plt.subplot(1, 3, 2)
        plt.plot(loss)
        plt.xlabel('Iteration')
        plt.title('Loss, all')
        plt.subplot(1, 3, 3)
        plt.plot(stepsize)
        plt.xlabel('Iteration')
        plt.title('Step size')
        plt.show()
        print('Last loss:', loss[-1])
    else:  # For finding a bunch of fixed points
        all_points = np.zeros((num_points, np.size(rnn.act)))
        fp_outputs = np.zeros((num_points, rnn_par['out_weights'].shape[1]))
        trajectories = []
        for p in range(num_points):
            endloss = 1000  # Some big value above the threshold
            tries = 0
            while endloss > thresh:
                if tries < max_tries:
                    x, loss, stepsize, x_traj = find_point(inp, opt_iters, eps)
                    endloss = loss[-1]
                    tries += 1
                else:
                    print('Unsuccessful run; error=%g' % endloss)
                    raise TimeoutError('No fixed points found in %d tries' %
                                       max_tries)
            all_points[p, :] = x
            fp_outputs[p] = output(x)
            trajectories.append(np.array(x_traj))
            print('.', end="")
        finish = time.time()
        print('Done with fixed points in %d seconds' % (finish - start))
        return all_points, fp_outputs, trajectories
Exemplo n.º 5
0
 def relu(self,w):
     cost = np.sum(np.maximum(0,-self.y*self.model(self.x,w)))
     return cost/float(np.size(self.y))
Exemplo n.º 6
0
 def least_absolute_deviations(self,w):
     cost = np.sum(np.abs(self.model(self.x,w) - self.y))
     return cost/float(np.size(self.y))
Exemplo n.º 7
0
def NN_diffusion(nx, nt, iterations, num_hidden_neurons, learning_rate):
    tf.reset_default_graph()

    #set a seed to get the same resuls from every run
    tf.set_random_seed(4155)

    x_ = np.linspace(0, 1, nx)
    t_ = np.linspace(0, 1, nt)

    X, T = np.meshgrid(x_, t_)

    x = X.ravel()
    t = T.ravel()

    #Construct Neural network
    zeros = tf.reshape(tf.convert_to_tensor(np.zeros(x.shape)), shape=(-1, 1))
    x = tf.reshape(tf.convert_to_tensor(x), shape=(-1, 1))
    t = tf.reshape(tf.convert_to_tensor(t), shape=(-1, 1))

    total_points = tf.concat([x, t], 1)  #input layer

    #number of hidden layers
    num_hidden_layers = len(num_hidden_neurons)
    #print('hidden layers:',num_hidden_layers)

    X = tf.convert_to_tensor(X)
    T = tf.convert_to_tensor(T)

    #construct the network
    #layer structures
    with tf.name_scope('dnn'):
        num_hidden_layers = np.size(num_hidden_neurons)
        previous_layer = total_points

        for l in range(num_hidden_layers):
            current_layer = tf.layers.dense(previous_layer,
                                            num_hidden_neurons[l],
                                            name=('hidden{}'.format(l + 1)),
                                            activation=tf.nn.sigmoid)

            previous_layer = current_layer

        dnn_output = tf.layers.dense(previous_layer,
                                     1,
                                     name='output',
                                     activation=None)

    #Define the cost function
    with tf.name_scope('loss'):

        g_trial = (1 - t) * u(x) + x * (1 - x) * t * dnn_output
        g_trial_d2x = tf.gradients(tf.gradients(g_trial, x), x)
        g_trial_dt = tf.gradients(g_trial, t)

        loss = tf.losses.mean_squared_error(zeros,
                                            g_trial_dt[0] - g_trial_d2x[0])

    #Defining optimizer
    with tf.name_scope('train'):
        optimizer = tf.train.AdamOptimizer(learning_rate)
        training_op = optimizer.minimize(loss)

    #Define a node that initializes all of the other nodes in the computational graph
    init = tf.global_variables_initializer()

    #g_analytic = tf.sin(np.pi*x)*tf.exp(-np.pi*np.pi*t)
    g_analytic = u_analytic(x, t)
    g_dnn = None

    start = time.time()

    #The execution phase
    with tf.Session() as sess:
        #intialtize the initial cost
        init.run()

        #training of the network
        for i in range(iterations):
            sess.run(training_op)

        #store the results
        g_analytic = g_analytic.eval()  #analytic solution
        g_dnn = g_trial.eval()  #Neural network solution
        cost = loss.eval()  #cost evaluation
    stop = time.time()
    print('time duration:', stop - start)
    """
	#compare with analytical solution
	diff = np.abs(g_analytic - g_dnn)
	max_diff =np.max(diff)
	print('max absolute difference between the analytical and the tensorflow: ', max_diff)
	"""

    #statistical computations
    r2 = r2_score(g_analytic, g_dnn)
    mse = mean_squared_error(g_analytic, g_dnn)
    print('R2:', r2)
    print('MSE:', mse)

    G_analytic = g_analytic.reshape((nt, nx))
    G_dnn = g_dnn.reshape((nt, nx))

    #compare with analytical solution
    diff = np.abs(G_analytic - G_dnn)
    max_diff = np.max(diff)
    print(
        'max absolute difference between the analytical and the tensorflow: ',
        max_diff)

    X, T = np.meshgrid(x_, t_)

    fig = plt.figure()
    ax = fig.gca(projection='3d')
    ax.set_title(
        'Solution from the deep neural network with %d layer \n and 50 neurons within hidden layer'
        % len(num_hidden_neurons))
    s = ax.plot_surface(X,
                        T,
                        G_dnn,
                        linewidth=0,
                        antialiased=False,
                        cmap=cm.viridis)
    ax.set_xlabel('Time $t$')
    ax.set_ylabel('Position $x$')
    #plt.savefig('solution_deep_nn_new.png')

    fig = plt.figure()
    ax = fig.gca(projection='3d')
    ax.set_title(
        'Analytical solution of diffusion equation with 4 hidden layers \n and 50 neurons within hidden layer'
    )
    s = ax.plot_surface(X,
                        T,
                        G_analytic,
                        linewidth=0,
                        antialiased=False,
                        cmap=cm.viridis)
    ax.set_xlabel('Time $t$')
    ax.set_ylabel('Position $x$')
    #plt.savefig('analytical_solution_nn_new.png')

    fig = plt.figure()
    ax = fig.gca(projection='3d')
    ax.set_title(
        'Difference between the numerical and analytical solution, \n with 4 hidden layers and 50 neurons within hidden layer'
    )
    s = ax.plot_surface(X,
                        T,
                        diff,
                        linewidth=0,
                        antialiased=False,
                        cmap=cm.viridis)
    ax.set_xlabel('Time $t$')
    ax.set_ylabel('Position $x$')
    #plt.savefig('difference_nn_new.png')

    plt.show()
    """
	# Take some slices of the 3D plots just to see the solutions at particular times
	indx1 = 0
	indx2 = int(nt/2)
	indx3 = nt-1

	t1 = t_[indx1]
	t2 = t_[indx2]
	t3 = t_[indx3]

	# Slice the results from the DNN
	res1 = g_dnn[:,indx1]
	res2 = g_dnn[:,indx2]
	res3 = g_dnn[:,indx3]

	# Slice the analytical results
	res_analytical1 = G_analytical[:,indx1]
	res_analytical2 = G_analytical[:,indx2]
	res_analytical3 = G_analytical[:,indx3]

	# Plot the slices
	plt.figure()
	plt.title("Computed solutions at time = %g"%t1)
	plt.plot(x_, res1)
	plt.plot(x_,res_analytical1)
	plt.legend(['dnn','analytical'])
	plt.savefig('computed_solution_nn_t1.png')

	plt.figure()
	plt.title("Computed solutions at time = %g"%t2)
	plt.plot(x_, res2)
	plt.plot(x_,res_analytical2)
	plt.legend(['dnn','analytical'])
	plt.savefig('computed_solution_nn_t2.png')

	plt.figure()
	plt.title("Computed solutions at time = %g"%t3)
	plt.plot(x_, res3)
	plt.plot(x_,res_analytical3)
	plt.legend(['dnn','analytical'])
	plt.savefig('computed_solution_nn_t3.png')

	plt.show()
	"""

    return diff
Exemplo n.º 8
0
    def plot_fit_and_feature_space(self,w,model,feat,**kwargs):        
        # construct figure
        fig, axs = plt.subplots(1, 3, figsize=(9,4))

        # create subplot with 2 panels
        gs = gridspec.GridSpec(1, 2, width_ratios=[1,1]) 
        ax1 = plt.subplot(gs[0]);
        ax2 = plt.subplot(gs[1]); 
        
        view = [20,20]
        if 'view' in kwargs:
            view = kwargs['view']

        ##### plot left panel in original space ####
        # scatter points
        xmin,xmax,ymin,ymax = self.scatter_pts_2d(self.x,ax1)

        # clean up panel
        ax1.set_xlim([xmin,xmax])
        ax1.set_ylim([ymin,ymax])

        # label axes
        ax1.set_xlabel(r'$x$', fontsize = 16)
        ax1.set_ylabel(r'$y$', rotation = 0,fontsize = 16,labelpad = 10)
        
        # create fit
        s = np.linspace(xmin,xmax,300)[np.newaxis,:]
 
        normalizer = lambda a: a
        if 'normalizer' in kwargs:
            normalizer = kwargs['normalizer']

        t = model(normalizer(s),w)
        
        ax1.plot(s.flatten(),t.flatten(),linewidth = 4,c = 'k',zorder = 0)    
        ax1.plot(s.flatten(),t.flatten(),linewidth = 2,c = 'lime',zorder = 0)

        #### plot fit in transformed feature space #####
        # check if feature transform has internal parameters
        x_transformed = 0
        sig = signature(feat)
        if len(sig.parameters) == 2:
            if np.shape(w)[1] == 1:
                x_transformed = feat(normalizer(self.x),w)
            else:
                x_transformed = feat(normalizer(self.x),w[0])
        else: 
            x_transformed = feat(normalizer(self.x))
        
        # two dimensional transformed feature space
        if x_transformed.shape[0] == 1:
            s = np.linspace(xmin,xmax,300)[np.newaxis,:]
            
            # scatter points
            xmin,xmax,ymin,ymax = self.scatter_pts_2d(x_transformed,ax2)
        
            # produce plot
            s2 = copy.deepcopy(s)
            if len(sig.parameters) == 2:
                if np.shape(w)[1] == 1:
                    s2 = feat(normalizer(s),w)
                else:
                    s2 = feat(normalizer(s),w[0])
            else: 
                s2 = feat(normalizer(s))
            t = model(normalizer(s),w)
            
            ax2.plot(s2.flatten(),t.flatten(),linewidth = 4,c = 'k',zorder = 0)    
            ax2.plot(s2.flatten(),t.flatten(),linewidth = 2,c = 'lime',zorder = 0)
            
            # label axes
            ax2.set_xlabel(r'$f\left(x,\mathbf{w}^{\star}\right)$', fontsize = 16)
            ax2.set_ylabel(r'$y$', rotation = 0,fontsize = 16,labelpad = 10)
            
        # three dimensional transformed feature space
        if x_transformed.shape[0] == 2:
            # create panel
            ax2 = plt.subplot(gs[1],projection = '3d');  
            s = np.linspace(xmin,xmax,100)[np.newaxis,:]

            # plot data in 3d
            xmin,xmax,xmin1,xmax1,ymin,ymax = self.scatter_3d_points(x_transformed,ax2)

            # create and plot fit
            s2 = copy.deepcopy(s)
            if len(sig.parameters) == 2:
                s2 = feat(normalizer(s),w[0])
            else: 
                s2 = feat(normalizer(s))
 
            # reshape for plotting
            a = s2[0,:]
            b = s2[1,:]
            a = np.linspace(xmin,xmax,100)
            b = np.linspace(xmin1,xmax1,100)
            a,b = np.meshgrid(a,b)
            
            # get firstem
            a.shape = (1,np.size(s)**2)
            f1 = feat(normalizer(a))[0,:]
            
            # secondm
            b.shape = (1,np.size(s)**2)
            f2 = feat(normalizer(b))[1,:]
            
            # tack a 1 onto the top of each input point all at once
            c = np.vstack((a,b))
            o = np.ones((1,np.shape(c)[1]))
            c = np.vstack((o,c))
            r = (np.dot(c.T,w))
            
            # various
            a.shape = (np.size(s),np.size(s))
            b.shape = (np.size(s),np.size(s))
            r.shape = (np.size(s),np.size(s))
            ax2.plot_surface(a,b,r,alpha = 0.1,color = 'lime',rstride=15, cstride=15,linewidth=0.5,edgecolor = 'k')
            ax2.set_xlim([np.min(a),np.max(a)])
            ax2.set_ylim([np.min(b),np.max(b)])
            
            '''
            a,b = np.meshgrid(t1,t2)
            a.shape = (1,np.size(s)**2)
            b.shape = (1,np.size(s)**2)
            '''
 
            '''
            c = np.vstack((a,b))
            o = np.ones((1,np.shape(c)[1]))
            c = np.vstack((o,c))

            # tack a 1 onto the top of each input point all at once
            r = (np.dot(c.T,w))

            a.shape = (np.size(s),np.size(s))
            b.shape = (np.size(s),np.size(s))
            r.shape = (np.size(s),np.size(s))
            ax2.plot_surface(a,b,r,alpha = 0.1,color = 'lime',rstride=15, cstride=15,linewidth=0.5,edgecolor = 'k')
            '''
            
            # label axes
            #self.move_axis_left(ax2)
            ax2.set_xlabel(r'$f_1(x)$', fontsize = 12,labelpad = 5)
            ax2.set_ylabel(r'$f_2(x)$', rotation = 0,fontsize = 12,labelpad = 5)
            ax2.set_zlabel(r'$y$', rotation = 0,fontsize = 12,labelpad = 0)
            self.move_axis_left(ax2)
            ax2.xaxis.set_major_formatter(FormatStrFormatter('%.1f'))
            ax2.yaxis.set_major_formatter(FormatStrFormatter('%.1f'))
            ax2.view_init(view[0],view[1])
    def plot_cost_histories(self, histories, start, **kwargs):
        # plotting colors
        colors = ['k', 'magenta', 'aqua', 'blueviolet', 'chocolate']

        # initialize figure
        fig = plt.figure(figsize=(10, 3))

        # create subplot with 1 panel
        gs = gridspec.GridSpec(1, 1)
        ax = plt.subplot(gs[0])

        # any labels to add?
        labels = [' ', ' ']
        if 'labels' in kwargs:
            labels = kwargs['labels']

        # plot points on cost function plot too?
        points = False
        if 'points' in kwargs:
            points = kwargs['points']

        # run through input histories, plotting each beginning at 'start' iteration
        for c in range(len(histories)):
            history = histories[c]
            label = 0
            if c == 0:
                label = labels[0]
            else:
                label = labels[1]

            # check if a label exists, if so add it to the plot
            if np.size(label) == 0:
                ax.plot(np.arange(start, len(history), 1),
                        history[start:],
                        linewidth=3 * (0.8)**(c),
                        color=colors[c])
            else:
                ax.plot(np.arange(start, len(history), 1),
                        history[start:],
                        linewidth=3 * (0.8)**(c),
                        color=colors[c],
                        label=label)

            # check if points should be plotted for visualization purposes
            if points == True:
                ax.scatter(np.arange(start, len(history), 1),
                           history[start:],
                           s=90,
                           color=colors[c],
                           edgecolor='w',
                           linewidth=2,
                           zorder=3)

        # clean up panel
        xlabel = 'step $k$'
        if 'xlabel' in kwargs:
            xlabel = kwargs['xlabel']
        ylabel = r'$g\left(\mathbf{w}^k\right)$'
        if 'ylabel' in kwargs:
            ylabel = kwargs['ylabel']
        ax.set_xlabel(xlabel, fontsize=14)
        ax.set_ylabel(ylabel, fontsize=14, rotation=0, labelpad=25)
        if np.size(label) > 0:
            anchor = (1, 1)
            if 'anchor' in kwargs:
                anchor = kwargs['anchor']
            plt.legend(loc='upper right', bbox_to_anchor=anchor)
            #leg = ax.legend(loc='upper left', bbox_to_anchor=(1.02, 1), borderaxespad=0)

        ax.set_xlim([start - 0.5, len(history) - 0.5])

        # fig.tight_layout()
        plt.show()
def softmax(w,x,y,beta):
    # compute cost over batch        
    cost = np.sum(beta*np.log(1 + np.exp(-y*model(x,w))))
    return cost/float(np.size(y))
Exemplo n.º 11
0
def d_ll(x, num_peds, ess, robot_mu_x, robot_mu_y, ped_mu_x, ped_mu_y, \
         cov_robot_x, cov_robot_y, inv_cov_robot_x, inv_cov_robot_y, \
         cov_ped_x, cov_ped_y, inv_cov_ped_x, inv_cov_ped_y, \
         one_over_cov_sum_x, one_over_cov_sum_y, normalize):
    T = np.size(robot_mu_x)

    d_alpha = [0. for _ in range(2 * T * np.int(np.round(ess + 1)))]
    d_beta = [0. for _ in range(2 * T * np.int(np.round(ess + 1)))]
    d_llambda = np.asarray(
        [0. for _ in range(2 * T * np.int(np.round(ess + 1)))])

    n = 2
    for ped in range(ess):
        # if normalize == True:
        #   # normalize_x = np.multiply(np.power(2*np.pi,-0.5), \
        # one_over_std_sum_x[ped])
        #   # normalize_y = np.multiply(np.power(2*np.pi,-0.5), \
        # one_over_std_sum_y[ped])
        # else:
        normalize_x = 1.
        normalize_y = 1.

        vel_robot_x = np.tile(x[:T],
                              (T, 1)).T - np.tile(x[n * T:(n + 1) * T], (T, 1))
        vel_robot_y = np.tile(x[T:2 * T],
                              (T, 1)).T - np.tile(x[(n + 1) * T:(n + 2) * T],
                                                  (T, 1))
        n = n + 2

        vel_robot_x_2 = np.power(vel_robot_x, 2)
        vel_robot_y_2 = np.power(vel_robot_y, 2)

        quad_robot_x = np.multiply(one_over_cov_sum_x[ped], vel_robot_x_2)
        quad_robot_y = np.multiply(one_over_cov_sum_y[ped], vel_robot_y_2)

        Z_x = np.multiply(normalize_x, np.exp(-0.5 * quad_robot_x))
        Z_y = np.multiply(normalize_y, np.exp(-0.5 * quad_robot_y))

        Z = np.multiply(Z_x, Z_y)
        X = np.divide(Z, 1. - Z)

        alpha_x = np.multiply(
            X, np.multiply(vel_robot_x, one_over_cov_sum_x[ped]))
        alpha_y = np.multiply(
            X, np.multiply(vel_robot_y, one_over_cov_sum_y[ped]))
        #        X and Y COMPONENT OF R DERIVATIVE
        d_alpha[:T] = np.add(d_alpha[:T], np.sum(alpha_x, axis=1))
        d_alpha[T:2 * T] = np.add(d_alpha[T:2 * T], np.sum(alpha_y, axis=1))

    d_beta[:T] = -np.dot(x[:T] - robot_mu_x, inv_cov_robot_x)
    d_beta[T:2 * T] = -np.dot(x[T:2 * T] - robot_mu_y, inv_cov_robot_y)

    d_llambda[0:2 * T] = np.add(d_alpha[0:2 * T], d_beta[0:2 * T])
    #        X AND Y COMPONENT OF PED DERIVATIVE
    n = 2
    for ped in range(ess):
        # if normalize == True:
        #   # normalize_x = np.multiply(np.power(2*np.pi,-0.5), \
        # one_over_std_sum_x[ped])
        #   # normalize_y = np.multiply(np.power(2*np.pi,-0.5), \
        # one_over_std_sum_y[ped])
        # else:
        normalize_x = 1.
        normalize_y = 1.

        vel_ped_x = np.tile(x[:T], (T, 1)) - np.tile(x[n * T:(n + 1) * T],
                                                     (T, 1)).T
        vel_ped_y = np.tile(x[T:2 * T],
                            (T, 1)) - np.tile(x[(n + 1) * T:(n + 2) * T],
                                              (T, 1)).T
        vel_ped_x_2 = np.power(vel_ped_x, 2)
        vel_ped_y_2 = np.power(vel_ped_y, 2)

        quad_ped_x = np.multiply(one_over_cov_sum_x[ped], vel_ped_x_2)
        quad_ped_y = np.multiply(one_over_cov_sum_y[ped], vel_ped_y_2)

        Z_x = np.multiply(normalize_x, np.exp(-0.5 * quad_ped_x))
        Z_y = np.multiply(normalize_y, np.exp(-0.5 * quad_ped_y))

        Z = np.multiply(Z_x, Z_y)
        X = np.divide(Z, 1. - Z)

        alpha_x = np.multiply(X, np.multiply(vel_ped_x,
                                             one_over_cov_sum_x[ped]))
        alpha_y = np.multiply(X, np.multiply(vel_ped_y,
                                             one_over_cov_sum_y[ped]))

        d_alpha[n * T:(n + 1) * T] = -np.sum(alpha_x, axis=1)
        d_alpha[(n + 1) * T:(n + 2) * T] = -np.sum(alpha_y, axis=1)

        d_beta[n*T:(n+1)*T] = -np.dot(x[n*T:(n+1)*T]-ped_mu_x[ped], \
                                                                 inv_cov_ped_x[ped])
        d_beta[(n+1)*T:(n+2)*T] = -np.dot(x[(n+1)*T:(n+2)*T]-ped_mu_y[ped], \
                                                                 inv_cov_ped_y[ped])
        n = n + 2

    d_llambda[2 * T:] = np.add(d_alpha[2 * T:], d_beta[2 * T:])
    return -1. * d_llambda
Exemplo n.º 12
0
def dd_ll(x, num_peds, ess, robot_mu_x, robot_mu_y, ped_mu_x, ped_mu_y, \
          cov_robot_x, cov_robot_y, inv_cov_robot_x, inv_cov_robot_y, \
          cov_ped_x, cov_ped_y, inv_cov_ped_x, inv_cov_ped_y, \
          one_over_cov_sum_x, one_over_cov_sum_y, normalize):
    T = np.size(robot_mu_x)

    H = np.zeros((2 * T * np.int(ess + 1), 2 * T * np.int(ess + 1)), float)
    sum_d_alpha = [0. for _ in range(2 * T * np.int(np.round(ess + 1)))]

    n = 2
    for ped in range(ess):
        # if normalize == True:
        #   # normalize_x = np.multiply(np.power(2*np.pi,-0.5), \
        # one_over_std_sum_x[ped])
        #   # normalize_y = np.multiply(np.power(2*np.pi,-0.5), \
        # one_over_std_sum_y[ped])
        # else:
        normalize_x = 1.
        normalize_y = 1.

        vel_robot_x = np.tile(x[:T],
                              (T, 1)).T - np.tile(x[n * T:(n + 1) * T], (T, 1))
        vel_robot_y = np.tile(x[T:2 * T],
                              (T, 1)).T - np.tile(x[(n + 1) * T:(n + 2) * T],
                                                  (T, 1))
        vel_robot_x_2 = np.power(vel_robot_x, 2)
        vel_robot_y_2 = np.power(vel_robot_y, 2)
        vel_robot_x_y = np.multiply(vel_robot_x, vel_robot_y)

        one_over_cov_x_y = np.multiply(one_over_cov_sum_x[ped], \
                                                            one_over_cov_sum_y[ped])
        quad_robot_x = np.multiply(one_over_cov_sum_x[ped], vel_robot_x_2)
        quad_robot_y = np.multiply(one_over_cov_sum_y[ped], vel_robot_y_2)

        Z_x = np.multiply(normalize_x, np.exp(-0.5 * quad_robot_x))
        Z_y = np.multiply(normalize_y, np.exp(-0.5 * quad_robot_y))

        Z = np.multiply(Z_x, Z_y)
        X = np.divide(Z, 1. - Z)
        X_2 = np.power(X, 2)

        X_plus_X2 = np.add(X, X_2)

        d_alpha_x = np.multiply(X, one_over_cov_sum_x[ped])
        d_alpha_x = np.add(d_alpha_x, -np.multiply(X_plus_X2, np.power(\
                             np.multiply(vel_robot_x, one_over_cov_sum_x[ped]), 2)))
        d_alpha_y = np.multiply(X, one_over_cov_sum_y[ped])
        d_alpha_y = np.add(d_alpha_y, -np.multiply(X_plus_X2, np.power(\
                             np.multiply(vel_robot_y, one_over_cov_sum_y[ped]), 2)))

        sum_d_alpha[:T] = np.add(sum_d_alpha[:T], np.sum(d_alpha_x, axis=1))
        sum_d_alpha[T:2 * T] = np.add(sum_d_alpha[T:2 * T],
                                      np.sum(d_alpha_y, axis=1))

        d_off_alpha = -np.multiply(X_plus_X2, np.multiply(vel_robot_x_y, \
                                                                  one_over_cov_x_y))
        #   OFF DIAGONALS
        H[:T, T:2 * T] = np.add(H[:T, T:2 * T],
                                np.diag(np.sum(d_off_alpha, axis=1)))

        H[:T, n * T:(n + 1) * T] = -1. * d_alpha_x
        H[n * T:(n + 1) * T, :T] = H[:T, n * T:(n + 1) * T].T

        H[T:2 * T, (n + 1) * T:(n + 2) * T] = -1. * d_alpha_y
        H[(n + 1) * T:(n + 2) * T, T:2 * T] = H[T:2 * T,
                                                (n + 1) * T:(n + 2) * T].T

        H[T:2*T,n*T:(n+1)*T] = np.multiply(X_plus_X2, np.multiply(vel_robot_x_y, \
                                                                  one_over_cov_x_y))
        H[n * T:(n + 1) * T, T:2 * T] = H[T:2 * T, n * T:(n + 1) * T].T

        H[:T,(n+1)*T:(n+2)*T] = np.multiply(X_plus_X2, np.multiply(vel_robot_x_y, \
                                                                  one_over_cov_x_y))
        H[(n + 1) * T:(n + 2) * T, :T] = H[:T, (n + 1) * T:(n + 2) * T].T

        n = n + 2

    H[:T, :T] = np.add(np.diag(sum_d_alpha[:T]), -1. * inv_cov_robot_x)
    H[T:2 * T, T:2 * T] = np.add(np.diag(sum_d_alpha[T:2 * T]),
                                 -1. * inv_cov_robot_y)

    H[T:2 * T, :T] = H[:T, T:2 * T].T
    #      PED DIAGONALS
    n = 2
    for ped in range(ess):
        # if normalize == True:
        #   # normalize_x = np.multiply(np.power(2*np.pi,-0.5), \
        # one_over_std_sum_x[ped])
        #   # normalize_y = np.multiply(np.power(2*np.pi,-0.5), \
        # one_over_std_sum_y[ped])
        # else:
        normalize_x = 1.
        normalize_y = 1.

        vel_ped_x = np.tile(x[:T], (T, 1)) - np.tile(x[n * T:(n + 1) * T],
                                                     (T, 1)).T
        vel_ped_y = np.tile(x[T:2 * T],
                            (T, 1)) - np.tile(x[(n + 1) * T:(n + 2) * T],
                                              (T, 1)).T
        vel_ped_x_2 = np.power(vel_ped_x, 2)
        vel_ped_y_2 = np.power(vel_ped_y, 2)
        vel_ped_x_y = np.multiply(vel_ped_x, vel_ped_y)

        one_over_cov_x_y = np.multiply(one_over_cov_sum_x[ped], \
                                                            one_over_cov_sum_y[ped])
        quad_ped_x = np.multiply(one_over_cov_sum_x[ped], vel_ped_x_2)
        quad_ped_y = np.multiply(one_over_cov_sum_y[ped], vel_ped_y_2)

        Z_x = np.multiply(normalize_x, np.exp(-0.5 * quad_ped_x))
        Z_y = np.multiply(normalize_y, np.exp(-0.5 * quad_ped_y))

        Z = np.multiply(Z_x, Z_y)
        X = np.divide(Z, 1. - Z)
        X_2 = np.power(X, 2)

        X_plus_X2 = np.add(X, X_2)

        d_alpha_x = np.multiply(X, one_over_cov_sum_x[ped])
        d_alpha_x = np.add(d_alpha_x, -np.multiply(X_plus_X2, np.power(\
                               np.multiply(vel_ped_x, one_over_cov_sum_x[ped]), 2)))
        d_alpha_y = np.multiply(X, one_over_cov_sum_y[ped])
        d_alpha_y = np.add(d_alpha_y, -np.multiply(X_plus_X2, np.power(\
                               np.multiply(vel_ped_y, one_over_cov_sum_y[ped]), 2)))

        H[n*T:(n+1)*T,n*T:(n+1)*T] = np.diag(np.sum(d_alpha_x, axis=1)) - \
                                                                  inv_cov_ped_x[ped]
        H[(n+1)*T:(n+2)*T,(n+1)*T:(n+2)*T] = np.diag(np.sum(d_alpha_y, axis=1)) - \
                                                                  inv_cov_ped_y[ped]
        H[n*T:(n+1)*T,(n+1)*T:(n+2)*T] = -np.diag(np.sum(np.multiply(X_plus_X2, \
                               np.multiply(vel_ped_x_y, one_over_cov_x_y)), axis=1))

        H[(n + 1) * T:(n + 2) * T,
          n * T:(n + 1) * T] = H[n * T:(n + 1) * T, (n + 1) * T:(n + 2) * T].T

        n = n + 2
    return -1. * H
    def plot_three_fits(self, run1, run2, run3, **kwargs):
        ## strip off model, normalizer, etc., ##
        model1 = run1.model
        model2 = run2.model
        model3 = run3.model

        normalizer1 = run1.normalizer
        normalizer2 = run2.normalizer
        normalizer3 = run3.normalizer

        # get weights
        cost_history1 = run1.cost_histories[0]
        ind1 = np.argmin(cost_history1)
        w1 = run1.weight_histories[0][ind1]
        cost_history2 = run2.cost_histories[0]
        ind2 = np.argmin(cost_history2)
        w2 = run2.weight_histories[0][ind2]
        cost_history3 = run3.cost_histories[0]
        ind3 = np.argmin(cost_history3)
        w3 = run3.weight_histories[0][ind3]

        # construct figure
        fig, axs = plt.subplots(1, 3, figsize=(10, 4))

        # create subplot with 2 panels
        gs = gridspec.GridSpec(1, 3)
        ax1 = plt.subplot(gs[0], aspect='equal')
        ax2 = plt.subplot(gs[1], aspect='equal')
        ax3 = plt.subplot(gs[2], aspect='equal')

        # loop over axes
        for ax in [ax1, ax2, ax3]:
            ### from above
            ax.set_xlabel(r'$x_1$', fontsize=15)
            ax.set_ylabel(r'$x_2$', fontsize=15, rotation=0, labelpad=20)
            ax.xaxis.set_major_formatter(FormatStrFormatter('%.1f'))
            ax.yaxis.set_major_formatter(FormatStrFormatter('%.1f'))

            # plot points in 2d
            ind0 = np.argwhere(self.y == +1)
            ax.scatter(self.x[ind0, 0],
                       self.x[ind0, 1],
                       s=55,
                       color=self.colors[0],
                       edgecolor='k')

            ind1 = np.argwhere(self.y == -1)
            ax.scatter(self.x[ind1, 0],
                       self.x[ind1, 1],
                       s=55,
                       color=self.colors[1],
                       edgecolor='k')

            ### create surface and boundary plot ###
            xmin1 = np.min(self.x[:, 0])
            xmax1 = np.max(self.x[:, 0])
            xgap1 = (xmax1 - xmin1) * 0.05
            xmin1 -= xgap1
            xmax1 += xgap1

            xmin2 = np.min(self.x[:, 1])
            xmax2 = np.max(self.x[:, 1])
            xgap2 = (xmax2 - xmin2) * 0.05
            xmin2 -= xgap2
            xmax2 += xgap2

            # plot boundary for 2d plot
            r1 = np.linspace(xmin1, xmax1, 300)
            r2 = np.linspace(xmin2, xmax2, 300)
            s, t = np.meshgrid(r1, r2)
            s = np.reshape(s, (np.size(s), 1))
            t = np.reshape(t, (np.size(t), 1))
            h = np.concatenate((s, t), axis=1)

            # plot model
            z = 0
            if ax == ax1:
                z = model1(normalizer1(h.T), w1)
                ax.set_title('underfitting', fontsize=14)
            if ax == ax2:
                z = model2(normalizer2(h.T), w2)
                ax.set_title('overfitting', fontsize=14)
            if ax == ax3:
                z = model3(normalizer3(h.T), w3)
                ax.set_title(r'"good"', fontsize=14)
            z = np.sign(z)

            # reshape it
            s.shape = (np.size(r1), np.size(r2))
            t.shape = (np.size(r1), np.size(r2))
            z.shape = (np.size(r1), np.size(r2))

            #### plot contour, color regions ####
            ax.contour(s,
                       t,
                       z,
                       colors='k',
                       linewidths=2.5,
                       levels=[0],
                       zorder=2)
            ax.contourf(s,
                        t,
                        z,
                        colors=[self.colors[1], self.colors[0]],
                        alpha=0.15,
                        levels=range(-1, 2))
Exemplo n.º 14
0
    gen_subspace_dim, dsc_subspace_dim = 100, 1000
    gen_subs_weights, dsc_subs_weights = np.zeros(gen_subspace_dim), np.zeros(
        dsc_subspace_dim)
    seed = npr.RandomState(0)

    # Training parameters
    param_scale = 0.1
    batch_size = 77
    num_epochs = 5000
    step_size_max = 0.001
    step_size_min = 0.001

    # Initialize gen & dsc params
    gen_layer_sizes = [latent_dim, 20, 20, data_dim]
    init_gen_params = init_random_params(param_scale, gen_layer_sizes)
    num_gen_params = gen_subspace_dim if subspace_training else np.size(
        flatten(init_gen_params)[0])
    print("num gen params: " + str(num_gen_params))
    if show_gen_params:
        dsc_input_size = data_dim + num_gen_params
    else:
        dsc_input_size = data_dim
    dsc_layer_sizes = [dsc_input_size, 30, 20, latent_dim]
    init_dsc_params = init_random_params(param_scale, dsc_layer_sizes)

    # Draw random subspace matrices
    gen_subs_project = sample_subs_projections(gen_layer_sizes,
                                               gen_subspace_dim,
                                               subspace_training,
                                               rs=seed)
    dsc_subs_project = sample_subs_projections(dsc_layer_sizes,
                                               dsc_subspace_dim,
Exemplo n.º 15
0
def fit_weights_and_save(
        weights_file,
        ca_data_file='rs_vm_denoise_200605.npy',
        vip_silencing_data_file='vip_halo_data_for_sim.npy',
        vip_activation_data_file='vip_chrimson_data_for_sim.npy',
        sst_silencing_data_file='sst_halo_data_for_sim.npy',
        constrain_wts=None,
        allow_var=True,
        fit_s02=True,
        constrain_isn=True,
        l2_penalty=0.01,
        init_noise=0.1,
        init_W_from_lsq=False,
        scale_init_by=1,
        init_W_from_file=False,
        init_file=None):

    nsize, ncontrast = 6, 6

    # In[3]:

    npfile = np.load(ca_data_file, allow_pickle=True)[(
    )]  #,{'rs':rs,'rs_denoise':rs_denoise},allow_pickle=True)
    rs = npfile['rs']
    rs_denoise = npfile['rs_denoise']

    # In[4]:

    nsize, ncontrast, ndir = 6, 6, 8
    ori_dirs = [[0, 4], [2, 6]]  #[[0,4],[1,3,5,7],[2,6]]
    nT = len(ori_dirs)
    nS = len(rs_denoise[0])

    def sum_to_1(r):
        R = r.reshape((r.shape[0], -1))
        #R = R/np.nansum(R[:,~np.isnan(R.sum(0))],axis=1)[:,np.newaxis]
        R = R / np.nansum(R, axis=1)[:, np.newaxis]  # changed 8/28
        return R

    def norm_to_mean(r):
        R = r.reshape((r.shape[0], -1))
        R = R / np.nanmean(R[:, ~np.isnan(R.sum(0))], axis=1)[:, np.newaxis]
        return R

    Rs = [[None, None] for i in range(len(rs))]
    Rso = [[[None for iT in range(nT)] for iS in range(nS)]
           for icelltype in range(len(rs))]
    rso = [[[None for iT in range(nT)] for iS in range(nS)]
           for icelltype in range(len(rs))]

    for iR, r in enumerate(rs):  #rs_denoise):
        print(iR)
        for ialign in range(nS):
            Rs[iR][ialign] = sum_to_1(r[ialign][:, :nsize, :])
    #         Rs[iR][ialign] = von_mises_denoise(Rs[iR][ialign].reshape((-1,nsize,ncontrast,ndir)))

    kernel = np.ones((1, 2, 2))
    kernel = kernel / kernel.sum()

    for iR, r in enumerate(rs):
        for ialign in range(nS):
            for iori in range(nT):
                Rso[iR][ialign][iori] = np.nanmean(
                    Rs[iR][ialign].reshape(
                        (-1, nsize, ncontrast, ndir))[:, :, :, ori_dirs[iori]],
                    -1)
                Rso[iR][ialign][iori][:, :, 0] = np.nanmean(
                    Rso[iR][ialign][iori][:, :, 0], 1)[:, np.newaxis]
                Rso[iR][ialign][iori][:, 1:, 1:] = ssi.convolve(
                    Rso[iR][ialign][iori], kernel, 'valid')
                Rso[iR][ialign][iori] = Rso[iR][ialign][iori].reshape(
                    Rso[iR][ialign][iori].shape[0], -1)

    #kernel = np.ones((1,2,2))
    #kernel = kernel/kernel.sum()
    #
    #for iR,r in enumerate(rs):
    #    for ialign in range(nS):
    #        for iori in range(nT):
    #            Rso[iR][ialign][iori] = np.nanmean(Rs[iR][ialign].reshape((-1,nsize,ncontrast,ndir))[:,:,:,ori_dirs[iori]],-1)
    #            Rso[iR][ialign][iori] = ssi.convolve(Rso[iR][ialign][iori],kernel,'same')
    #            Rso[iR][ialign][iori] = Rso[iR][ialign][iori].reshape(Rso[iR][ialign][iori].shape[0],-1)

    # In[6]:

    def set_bound(bd, code, val=0):
        # set bounds to 0 where 0s occur in 'code'
        for iitem in range(len(bd)):
            bd[iitem][code[iitem]] = val

    # In[7]:

    nN = 36
    nS = 2
    nP = 2
    nT = 2
    nQ = 4

    # code for bounds: 0 , constrained to 0
    # +/-1 , constrained to +/-1
    # 1.5, constrained to [0,1]
    # 2 , constrained to [0,inf)
    # -2 , constrained to (-inf,0]
    # 3 , unconstrained

    Wmx_bounds = 3 * np.ones((nP, nQ), dtype=int)
    Wmx_bounds[0, 1] = 0  # SSTs don't receive L4 input

    if allow_var:
        Wsx_bounds = 3 * np.ones(
            Wmx_bounds.shape)  #Wmx_bounds.copy()*0 #np.zeros_like(Wmx_bounds)
        Wsx_bounds[0, 1] = 0
    else:
        Wsx_bounds = np.zeros(
            Wmx_bounds.shape)  #Wmx_bounds.copy()*0 #np.zeros_like(Wmx_bounds)

    Wmy_bounds = 3 * np.ones((nQ, nQ), dtype=int)
    Wmy_bounds[0, :] = 2  # PCs are excitatory
    Wmy_bounds[1:, :] = -2  # all the cell types except PCs are inhibitory
    Wmy_bounds[1, 1] = 0  # SSTs don't inhibit themselves
    # Wmy_bounds[3,1] = 0 # PVs are allowed to inhibit SSTs, consistent with Hillel's unpublished results, but not consistent with Pfeffer et al.
    Wmy_bounds[
        2,
        0] = 0  # VIPs don't inhibit L2/3 PCs. According to Pfeffer et al., only L5 PCs were found to get VIP inhibition

    if allow_var:
        Wsy_bounds = 3 * np.ones(
            Wmy_bounds.shape)  #Wmy_bounds.copy()*0 #np.zeros_like(Wmy_bounds)
        Wsy_bounds[1, 1] = 0
        Wsy_bounds[3, 1] = 0
        Wsy_bounds[2, 0] = 0
    else:
        Wsy_bounds = np.zeros(
            Wmy_bounds.shape)  #Wmy_bounds.copy()*0 #np.zeros_like(Wmy_bounds)

    if not constrain_wts is None:
        for wt in constrain_wts:
            Wmy_bounds[wt[0], wt[1]] = 0
            Wsy_bounds[wt[0], wt[1]] = 0

    def tile_nS_nT_nN(kernel):
        row = np.concatenate([kernel for idim in range(nS * nT)],
                             axis=0)[np.newaxis, :]
        tiled = np.concatenate([row for irow in range(nN)], axis=0)
        return tiled

    if fit_s02:
        s02_bounds = 2 * np.ones(
            (nQ, ))  # permitting noise as a free parameter
    else:
        s02_bounds = np.ones((nQ, ))

    k_bounds = 1.5 * np.ones((nQ, ))

    kappa_bounds = np.ones((1, ))
    # kappa_bounds = 2*np.ones((1,))

    T_bounds = 1.5 * np.ones((nQ, ))

    X_bounds = tile_nS_nT_nN(np.array([2, 1]))
    # X_bounds = np.array([np.array([2,1,2,1])]*nN)

    Xp_bounds = tile_nS_nT_nN(np.array([3, 1]))
    # Xp_bounds = np.array([np.array([3,1,3,1])]*nN)

    # Y_bounds = tile_nS_nT_nN(2*np.ones((nQ,)))
    # # Y_bounds = 2*np.ones((nN,nT*nS*nQ))

    Eta_bounds = tile_nS_nT_nN(3 * np.ones((nQ, )))
    # Eta_bounds = 3*np.ones((nN,nT*nS*nQ))

    if allow_var:
        Xi_bounds = tile_nS_nT_nN(3 * np.ones((nQ, )))
    else:
        Xi_bounds = tile_nS_nT_nN(np.zeros((nQ, )))

    # Xi_bounds = 3*np.ones((nN,nT*nS*nQ))

    h1_bounds = -2 * np.ones((1, ))

    h2_bounds = 2 * np.ones((1, ))

    h3_bounds = -2 * np.ones((1, ))

    # In[8]:

    # shapes = [(nP,nQ),(nQ,nQ),(nP,nQ),(nQ,nQ),(nQ,),(nQ,),(1,),(nN,nS*nP),(nN,nS*nQ),(nN,nS*nQ),(nN,nS*nQ)]
    shapes = [(nP, nQ), (nQ, nQ), (nP, nQ), (nQ, nQ), (nQ, ), (nQ, ), (1, ),
              (nQ, ), (nN, nT * nS * nP), (nN, nT * nS * nP),
              (nN, nT * nS * nQ), (nN, nT * nS * nQ), (1, ), (1, ), (1, ),
              (nN, nT * nS * nQ), (nN, nT * nS * nQ), (nN, nT * nS * nQ)]
    print('size of shapes: ' + str(np.sum([np.prod(shp) for shp in shapes])))
    #         Wmx,    Wmy,    Wsx,    Wsy,    s02,  k,    kappa,T,   XX,            XXp,          Eta,          Xi, h1, h2, Eta1,   Eta2

    lb = [-np.inf * np.ones(shp) for shp in shapes]
    ub = [np.inf * np.ones(shp) for shp in shapes]
    bdlist = [
        Wmx_bounds, Wmy_bounds, Wsx_bounds, Wsy_bounds, s02_bounds, k_bounds,
        kappa_bounds, T_bounds, X_bounds, Xp_bounds, Eta_bounds, Xi_bounds,
        h1_bounds, h2_bounds, h3_bounds, Eta_bounds, Eta_bounds, Eta_bounds
    ]

    set_bound(lb, [bd == 0 for bd in bdlist], val=0)
    set_bound(ub, [bd == 0 for bd in bdlist], val=0)

    set_bound(lb, [bd == 2 for bd in bdlist], val=0)

    set_bound(ub, [bd == -2 for bd in bdlist], val=0)

    set_bound(lb, [bd == 1 for bd in bdlist], val=1)
    set_bound(ub, [bd == 1 for bd in bdlist], val=1)

    set_bound(lb, [bd == 1.5 for bd in bdlist], val=0)
    set_bound(ub, [bd == 1.5 for bd in bdlist], val=1)

    set_bound(lb, [bd == -1 for bd in bdlist], val=-1)
    set_bound(ub, [bd == -1 for bd in bdlist], val=-1)

    # for bd in [lb,ub]:
    #     for ind in [2,3]:
    #         bd[ind][:,1] = 0

    # temporary for no variation expt.
    # lb[2] = np.zeros_like(lb[2])
    # lb[3] = np.zeros_like(lb[3])
    # lb[4] = np.ones_like(lb[4])
    # lb[5] = np.zeros_like(lb[5])
    # ub[2] = np.zeros_like(ub[2])
    # ub[3] = np.zeros_like(ub[3])
    # ub[4] = np.ones_like(ub[4])
    # ub[5] = np.ones_like(ub[5])
    # temporary for no variation expt.
    lb = np.concatenate([a.flatten() for a in lb])
    ub = np.concatenate([b.flatten() for b in ub])
    bounds = [(a, b) for a, b in zip(lb, ub)]

    # In[10]:

    nS = 2
    ndims = 5
    ncelltypes = 5
    Yhat = [[None for iT in range(nT)] for iS in range(nS)]
    Xhat = [[None for iT in range(nT)] for iS in range(nS)]
    Ypc_list = [[None for iT in range(nT)] for iS in range(nS)]
    Xpc_list = [[None for iT in range(nT)] for iS in range(nS)]
    mx = [None for iS in range(nS)]
    for iS in range(nS):
        mx[iS] = np.zeros((ncelltypes, ))
        yy = [None for icelltype in range(ncelltypes)]
        for icelltype in range(ncelltypes):
            yy[icelltype] = np.nanmean(Rso[icelltype][iS][0], 0)
            mx[iS][icelltype] = np.nanmax(yy[icelltype])
        for iT in range(nT):
            y = [
                np.nanmean(Rso[icelltype][iS][iT], axis=0)[:, np.newaxis] /
                mx[iS][icelltype] for icelltype in range(1, ncelltypes)
            ]
            Ypc_list[iS][iT] = [None for icelltype in range(1, ncelltypes)]
            for icelltype in range(1, ncelltypes):
                rss = Rso[icelltype][iS][iT].copy(
                )  #/mx[iS][icelltype] #.reshape(Rs[icelltype][ialign].shape[0],-1)
                #rss = Rso[icelltype][iS][iT].copy() #.reshape(Rs[icelltype][ialign].shape[0],-1)
                rss = rss[np.isnan(rss).sum(1) == 0]
                #         print(rss.max())
                #         rss[rss<0] = 0
                #         rss = rss[np.random.randn(rss.shape[0])>0]
                try:
                    u, s, v = np.linalg.svd(rss - np.mean(rss, 0)[np.newaxis])
                    Ypc_list[iS][iT][icelltype - 1] = [
                        (s[idim], v[idim]) for idim in range(ndims)
                    ]
    #                 print('yep on Y')
    #                 print(np.min(np.sum(rs[icelltype][iS][iT],axis=1)))
                except:
                    #                 print('nope on Y')
                    print(np.mean(np.isnan(rss)))
                    print(np.min(np.sum(rs[icelltype][iS][iT], axis=1)))
            Yhat[iS][iT] = np.concatenate(y, axis=1)
            #         x = sim_utils.columnize(Rso[0][iS][iT])[:,np.newaxis]
            icelltype = 0
            #x = np.nanmean(Rso[icelltype][iS][iT],0)[:,np.newaxis]#/mx[iS][icelltype]
            x = np.nanmean(Rso[icelltype][iS][iT],
                           0)[:, np.newaxis] / mx[iS][icelltype]
            #         opto_column = np.concatenate((np.zeros((nN,)),np.zeros((nNO/2,)),np.ones((nNO/2,))),axis=0)[:,np.newaxis]
            Xhat[iS][iT] = np.concatenate((x, np.ones_like(x)), axis=1)
            #         Xhat[iS][iT] = np.concatenate((x,np.ones_like(x),opto_column),axis=1)
            icelltype = 0
            #rss = Rso[icelltype][iS][iT].copy()/mx[iS][icelltype]
            rss = Rso[icelltype][iS][iT].copy()
            rss = rss[np.isnan(rss).sum(1) == 0]
            #         try:
            u, s, v = np.linalg.svd(rss - rss.mean(0)[np.newaxis])
            Xpc_list[iS][iT] = [None for iinput in range(2)]
            Xpc_list[iS][iT][0] = [(s[idim], v[idim]) for idim in range(ndims)]
            Xpc_list[iS][iT][1] = [(0, np.zeros((Xhat[0][0].shape[0], )))
                                   for idim in range(ndims)]
    #         except:
    #             print('nope on X')
    #             print(np.mean(np.isnan(rss)))
    #             print(np.min(np.sum(Rso[icelltype][iS][iT],axis=1)))
    nN, nP = Xhat[0][0].shape
    nQ = Yhat[0][0].shape[1]

    # In[11]:

    def compute_f_(Eta, Xi, s02):
        return sim_utils.f_miller_troyer(
            Eta, Xi**2 + np.concatenate([s02 for ipixel in range(nS * nT)]))

    def compute_fprime_m_(Eta, Xi, s02):
        return sim_utils.fprime_miller_troyer(
            Eta, Xi**2 + np.concatenate([s02
                                         for ipixel in range(nS * nT)])) * Xi

    def compute_fprime_s_(Eta, Xi, s02):
        s2 = Xi**2 + np.concatenate((s02, s02), axis=0)
        return sim_utils.fprime_s_miller_troyer(Eta, s2) * (Xi / s2)

    def sorted_r_eigs(w):
        drW, prW = np.linalg.eig(w)
        srtinds = np.argsort(drW)
        return drW[srtinds], prW[:, srtinds]

    # In[12]:

    #         0.Wmx,  1.Wmy,  2.Wsx,  3.Wsy,  4.s02,5.K,  6.kappa,7.T,8.XX,        9.XXp,        10.Eta,       11.Xi,   12.h1,  13.h2,  14.Eta1,    15.Eta2

    shapes = [(nP, nQ), (nQ, nQ), (nP, nQ), (nQ, nQ), (nQ, ), (nQ, ), (1, ),
              (nQ, ), (nN, nT * nS * nP), (nN, nT * nS * nP),
              (nN, nT * nS * nQ), (nN, nT * nS * nQ), (1, ), (1, ), (1, ),
              (nN, nT * nS * nQ), (nN, nT * nS * nQ), (nN, nT * nS * nQ)]
    print('size of shapes: ' + str(np.sum([np.prod(shp) for shp in shapes])))

    # In[13]:

    import calnet.fitting_spatial_feature
    import sim_utils

    # In[14]:

    opto_dict = np.load(vip_silencing_data_file, allow_pickle=True)[()]

    Yhat_opto = opto_dict['Yhat_opto']
    for iS in range(nS):
        mx = np.zeros((nQ, ))
        for iQ in range(nQ):
            slicer = slice(nQ * nT * iS + iQ, nQ * nT * (1 + iS), nQ)
            mx[iQ] = np.nanmax(Yhat_opto[0::2][:, slicer])
            Yhat_opto[:, slicer] = Yhat_opto[:, slicer] / mx[iQ]
    #Yhat_opto = Yhat_opto/Yhat_opto[0::2].max(0)[np.newaxis,:]
    print(Yhat_opto.shape)
    h_opto = opto_dict['h_opto']
    dYY1 = Yhat_opto[1::2] - Yhat_opto[0::2]
    for to_overwrite in [1, 2, 5,
                         6]:  # overwrite sst and vip with off-centered values
        dYY1[:, to_overwrite] = dYY1[:, to_overwrite + 8]
    for to_overwrite in [11, 15]:
        dYY1[:, to_overwrite] = np.nan  #dYY1[:,to_overwrite-8]

    opto_dict = np.load(vip_activation_data_file, allow_pickle=True)[()]

    Yhat_opto = opto_dict['Yhat_opto']
    for iS in range(nS):
        mx = np.zeros((nQ, ))
        for iQ in range(nQ):
            slicer = slice(nQ * nT * iS + iQ, nQ * nT * (1 + iS), nQ)
            mx[iQ] = np.nanmax(Yhat_opto[0::2][:, slicer])
            Yhat_opto[:, slicer] = Yhat_opto[:, slicer] / mx[iQ]
    #Yhat_opto = Yhat_opto/Yhat_opto[0::2].max(0)[np.newaxis,:]
    print(Yhat_opto.shape)
    h_opto = opto_dict['h_opto']
    dYY2 = Yhat_opto[1::2] - Yhat_opto[0::2]

    opto_dict = np.load(sst_silencing_data_file, allow_pickle=True)[()]

    Yhat_opto = opto_dict['Yhat_opto']
    for iS in range(nS):
        mx = np.zeros((nQ, ))
        for iQ in range(nQ):
            slicer = slice(nQ * nT * iS + iQ, nQ * nT * (1 + iS), nQ)
            mx[iQ] = np.nanmax(Yhat_opto[0::2][:, slicer])
            Yhat_opto[:, slicer] = Yhat_opto[:, slicer] / mx[iQ]
    #Yhat_opto = Yhat_opto/Yhat_opto[0::2].max(0)[np.newaxis,:]
    print(Yhat_opto.shape)
    h_opto = opto_dict['h_opto']
    dYY3 = Yhat_opto[1::2] - Yhat_opto[0::2]

    print('dYY1 mean: %03f' % np.nanmean(np.abs(dYY1)))
    print('dYY2 mean: %03f' % np.nanmean(np.abs(dYY2)))

    dYY = np.concatenate((dYY1, dYY2, dYY3), axis=0)

    opto_mask = ~np.isnan(dYY)

    dYY[~opto_mask] = 0

    np.save(
        '/Users/dan/Documents/notebooks/mossing-PC/shared_data/calnet_data/dYY.npy',
        dYY)

    # In[ ]:

    from importlib import reload
    reload(calnet)
    #reload(calnet.fitting_spatial_feature_opto_nonlinear)
    reload(sim_utils)
    # reload(calnet.fitting_spatial_feature)
    # W0list = [np.ones(shp) for shp in shapes]
    wt_dict = {}
    wt_dict['X'] = 1
    wt_dict['Y'] = 5
    wt_dict['Eta'] = 10  # 1 #
    wt_dict['Xi'] = 0.1
    wt_dict['stims'] = np.ones((nN, 1))  #(np.arange(30)/30)[:,np.newaxis]**1 #
    wt_dict['barrier'] = 0.  #30.0 #0.1
    wt_dict['opto'] = 1e-1  #1e1
    wt_dict['isn'] = 3
    wt_dict['dYY'] = 300  #1000
    wt_dict['Eta12'] = 100
    wt_dict['EtaTV'] = 0.3
    wt_dict['coupling'] = 0

    YYhat = calnet.fitting_spatial_feature_opto_nonlinear_tridi.flatten_nested_list_of_2d_arrays(
        Yhat)
    XXhat = calnet.fitting_spatial_feature_opto_nonlinear_tridi.flatten_nested_list_of_2d_arrays(
        Xhat)
    Eta0 = invert_f_mt(YYhat)

    ntries = 1
    nhyper = 1
    dt = 1e-1
    niter = int(np.round(50 / dt))  #int(1e4)
    perturbation_size = 5e-2
    # learning_rate = 1e-4 # 1e-5 #np.linspace(3e-4,1e-3,niter+1) # 1e-5
    #l2_penalty = 0.1
    Wt = [[None for itry in range(ntries)] for ihyper in range(nhyper)]
    loss = np.zeros((nhyper, ntries))
    is_neg = np.array([b[1] for b in bounds]) == 0
    counter = 0
    negatize = [np.zeros(shp, dtype='bool') for shp in shapes]
    for ishp, shp in enumerate(shapes):
        nel = np.prod(shp)
        negatize[ishp][:][is_neg[counter:counter + nel].reshape(shp)] = True
        counter = counter + nel
    for ihyper in range(nhyper):
        for itry in range(ntries):
            print((ihyper, itry))
            W0list = [
                init_noise * (ihyper + 1) * np.random.rand(*shp)
                for shp in shapes
            ]
            print('size of shapes: ' +
                  str(np.sum([np.prod(shp) for shp in shapes])))
            print('size of w0: ' + str(np.sum([np.size(x) for x in W0list])))
            print('len(W0list) : ' + str(len(W0list)))
            counter = 0
            for ishp, shp in enumerate(shapes):
                W0list[ishp][negatize[ishp]] = -W0list[ishp][negatize[ishp]]
            W0list[4] = np.ones(shapes[5])  # s02
            W0list[5] = np.ones(shapes[5])  # K
            W0list[6] = np.ones(shapes[6])  # kappa
            W0list[7] = np.ones(shapes[7])  # T
            W0list[8] = np.concatenate(Xhat, axis=1)  #XX
            W0list[9] = np.zeros_like(W0list[8])  #XXp
            W0list[10] = Eta0.copy()  #np.zeros(shapes[10]) #Eta
            W0list[11] = np.zeros(shapes[11])  #Xi
            W0list[15] = Eta0.copy()  # Eta1
            W0list[16] = Eta0.copy()  # Eta2
            W0list[17] = Eta0.copy()  # Eta2
            #[Wmx,Wmy,Wsx,Wsy,s02,k,kappa,T,XX,XXp,Eta,Xi]
            #         W0list = Wstar_dict['as_list'].copy()
            #         W0list[1][1,0] = -1.5
            #         W0list[1][3,0] = -1.5
            if init_W_from_lsq:
                W0list[0], W0list[1] = initialize_W(Xhat,
                                                    Yhat,
                                                    scale_by=scale_init_by)
                for ivar in range(0, 2):
                    W0list[ivar] = W0list[ivar] + init_noise * np.random.randn(
                        *W0list[ivar].shape)
            if constrain_isn:
                W0list[1][0, 0] = 3
                W0list[1][0, 3] = 5
                W0list[1][3, 0] = -5
                W0list[1][3, 3] = -5

            if init_W_from_file:
                npyfile = np.load(init_file, allow_pickle=True)[()]
                W0list = npyfile['as_list']
                if len(W0list) < len(shapes):
                    W0list = W0list + [
                        np.array(0.7), -np.array(0.7), W0list[10].copy(),
                        W0list[10].copy(), W0list[10].copy()
                    ]  # add h2

            # wt_dict['Xi'] = 10
            # wt_dict['Eta'] = 10
            print('size of bounds: ' + str(np.sum([np.size(x)
                                                   for x in bdlist])))
            print('size of w0: ' + str(np.sum([np.size(x) for x in W0list])))
            print('size of shapes: ' +
                  str(np.sum([np.prod(shp) for shp in shapes])))
            Wt[ihyper][itry], loss[ihyper][
                itry], gr, hess, result = calnet.fitting_spatial_feature_opto_nonlinear_tridi.fit_W_sim(
                    Xhat,
                    Xpc_list,
                    Yhat,
                    Ypc_list,
                    pop_rate_fn=sim_utils.f_miller_troyer,
                    pop_deriv_fn=sim_utils.fprime_miller_troyer,
                    neuron_rate_fn=sim_utils.evaluate_f_mt,
                    W0list=W0list.copy(),
                    bounds=bounds,
                    niter=niter,
                    wt_dict=wt_dict,
                    l2_penalty=l2_penalty,
                    compute_hessian=False,
                    dt=dt,
                    perturbation_size=perturbation_size,
                    dYY=dYY,
                    constrain_isn=constrain_isn,
                    opto_mask=opto_mask)
    #         Wt[ihyper][itry] = [w[-1] for w in Wt_temp]
    #         loss[ihyper,itry] = loss_temp[-1]

    # In[285]:
    def parse_W(W):
        Wmx, Wmy, Wsx, Wsy, s02, K, kappa, T, XX, XXp, Eta, Xi, h1, h2, h3, Eta1, Eta2, Eta3 = W
        return Wmx, Wmy, Wsx, Wsy, s02, K, kappa, T, XX, XXp, Eta, Xi, h1, h2, h3, Eta1, Eta2, Eta3

    itry = 0
    Wmx, Wmy, Wsx, Wsy, s02, K, kappa, T, XX, XXp, Eta, Xi, h1, h2, h3, Eta1, Eta2, Eta3 = parse_W(
        Wt[0][0])

    # In[286]:

    labels = [
        'Wmx', 'Wmy', 'Wsx', 'Wsy', 's02', 'K', 'kappa', 'T', 'XX', 'XXp',
        'Eta', 'Xi', 'h1', 'h2', 'h3', 'Eta1', 'Eta2', 'Eta3'
    ]
    Wstar_dict = {}
    for i, label in enumerate(labels):
        Wstar_dict[label] = Wt[0][0][i]
    Wstar_dict['as_list'] = [
        Wmx, Wmy, Wsx, Wsy, s02, K, kappa, T, XX, XXp, Eta, Xi, h1, h2, h3,
        Eta1, Eta2, Eta3
    ]
    Wstar_dict['loss'] = loss[0][0]
    Wstar_dict['wt_dict'] = wt_dict
    np.save(weights_file, Wstar_dict, allow_pickle=True)
    def draw_it(self,w_init,max_its,**kwargs):
        ### input arguments ###        
        self.max_its = max_its
        self.grad = compute_grad(self.g)              # gradient of input function
        self.w_init = w_init
        
        if 'beta' in kwargs:
            self.beta = kwargs['beta']
            
        pts = 'off'
        if 'pts' in kwargs:
            pts = 'off'
            
        linewidth = 2.5
        if 'linewidth' in kwargs:
            linewidth = kwargs['linewidth']
            
        view = [20,-50]
        if 'view' in kwargs:
            view = kwargs['view']

        axes = False
        if 'axes' in kwargs:
            axes = kwargs['axes']

        plot_final = False
        if 'plot_final' in kwargs:
            plot_final = kwargs['plot_final']

        num_contours = 15
        if 'num_contours' in kwargs:
            num_contours = kwargs['num_contours']
            
        # get initial point 
        self.w_init = w_init
        if np.size(self.w_init) == 2:
            self.w_init = np.asarray([float(s) for s in self.w_init])
        else:
            self.w_init = np.asarray([float(self.w_init)])
        
        # take in user defined maximum number of iterations
        self.max_its = max_its
            
        # construct figure
        fig, axs = plt.subplots(1, 2, figsize=(9,4))

        # create subplot with 3 panels, plot input function in center plot
        gs = gridspec.GridSpec(1, 2, width_ratios=[2,1]) 
        ax = plt.subplot(gs[0],aspect = 'equal'); 
        ax2 = plt.subplot(gs[1]) #  ,sharey = ax); 

        #### run local random search algorithm ####
        self.w_hist = []
        self.run_newtons_method()

        # colors for points
        s = np.linspace(0,1,len(self.w_hist[:round(len(self.w_hist)/2)]))
        s.shape = (len(s),1)
        t = np.ones(len(self.w_hist[round(len(self.w_hist)/2):]))
        t.shape = (len(t),1)
        s = np.vstack((s,t))
        colorspec = []
        colorspec = np.concatenate((s,np.flipud(s)),1)
        colorspec = np.concatenate((colorspec,np.zeros((len(s),1))),1)
    
        #### define input space for function and evaluate ####
        if np.size(self.w_init) == 2:           # function is multi-input, plot 3d function contour
            # set viewing limits on contour plot
            xvals = [self.w_hist[s][0] for s in range(len(self.w_hist))]
            xvals.append(self.w_init[0])
            yvals = [self.w_hist[s][1] for s in range(len(self.w_hist))]
            yvals.append(self.w_init[1])
            xmax = max(xvals)
            xmin = min(xvals)
            xgap = (xmax - xmin)*0.1
            ymax = max(yvals)
            ymin = min(yvals)
            ygap = (ymax - ymin)*0.1
            xmin -= xgap
            xmax += xgap
            ymin -= ygap
            ymax += ygap

            if 'xmin' in kwargs:
                xmin = kwargs['xmin']
            if 'xmax' in kwargs:
                xmax = kwargs['xmax']
            if 'ymin' in kwargs:
                ymin = kwargs['ymin']
            if 'ymax' in kwargs:
                ymax = kwargs['ymax']  

            w1 = np.linspace(xmin,xmax,400)
            w2 = np.linspace(ymin,ymax,400)
            w1_vals, w2_vals = np.meshgrid(w1,w2)
            w1_vals.shape = (len(w1)**2,1)
            w2_vals.shape = (len(w2)**2,1)
            h = np.concatenate((w1_vals,w2_vals),axis=1)
            func_vals = np.asarray([self.g(s) for s in h])
            w1_vals.shape = (len(w1),len(w1))
            w2_vals.shape = (len(w2),len(w2))
            func_vals.shape = (len(w1),len(w2)) 

            ### make contour right plot - as well as horizontal and vertical axes ###
            # set level ridges
            num_contours = kwargs['num_contours']
            levelmin = min(func_vals.flatten())
            levelmax = max(func_vals.flatten())
            cutoff = 0.5
            cutoff = (levelmax - levelmin)*cutoff
            numper = 3
            levels1 = np.linspace(cutoff,levelmax,numper)
            num_contours -= numper

            levels2 = np.linspace(levelmin,cutoff,min(num_contours,numper))
            levels = np.unique(np.append(levels1,levels2))
            num_contours -= numper
            while num_contours > 0:
                cutoff = levels[1]
                levels2 = np.linspace(levelmin,cutoff,min(num_contours,numper))
                levels = np.unique(np.append(levels2,levels))
                num_contours -= numper

            a = ax.contour(w1_vals, w2_vals, func_vals,levels = levels,colors = 'k')
            ax.contourf(w1_vals, w2_vals, func_vals,levels = levels,cmap = 'Blues')
                
            # plot points on contour
            for j in range(len(self.w_hist)):  
                w_val = self.w_hist[j]
                g_val = self.g(w_val)

                # plot in left panel
                if pts == 'on':
                    ax.scatter(w_val[0],w_val[1],s = 30,c = colorspec[j],edgecolor = 'k',linewidth = 1.5*math.sqrt((1/(float(j) + 1))),zorder = 3)

                    ax2.scatter(j,g_val,s = 30,c = colorspec[j],edgecolor = 'k',linewidth = 0.7,zorder = 3)            # plot point of tangency

                # plot connector between points for visualization purposes
                if j > 0:
                    w_old = self.w_hist[j-1]
                    w_new = self.w_hist[j]
                    g_old = self.g(w_old)
                    g_new = self.g(w_new)

                    ax.plot([w_old[0],w_new[0]],[w_old[1],w_new[1]],color = colorspec[j],linewidth = linewidth,alpha = 1,zorder = 2)      # plot approx
                    ax.plot([w_old[0],w_new[0]],[w_old[1],w_new[1]],color = 'k',linewidth = linewidth + 0.4,alpha = 1,zorder = 1)      # plot approx
                    ax2.plot([j-1,j],[g_old,g_new],color = colorspec[j],linewidth = 2,alpha = 1,zorder = 2)      # plot approx
                    ax2.plot([j-1,j],[g_old,g_new],color = 'k',linewidth = 2.5,alpha = 1,zorder = 1)      # plot approx
            
            # clean up panel
            ax.set_xlabel('$w_1$',fontsize = 12)
            ax.set_ylabel('$w_2$',fontsize = 12,rotation = 0,labelpad = 15)
            ax.axhline(y=0, color='k',zorder = 0,linewidth = 0.5)
            ax.axvline(x=0, color='k',zorder = 0,linewidth = 0.5)
            ax.set_xlim([xmin,xmax])
            ax.set_ylim([ymin,ymax])
            
            # set tickmarks
            ax.set_xticks(np.arange(round(xmin), round(xmax) + 1, 1.0))
            ax.set_yticks(np.arange(round(ymin), round(ymax) + 1, 1.0))
            
        else:    # function is single input, plot curve
            xmin = -2
            xmax = 2
            if 'xmin' in kwargs:
                xmin = kwargs['xmin']
            if 'xmax' in kwargs:
                xmax = kwargs['xmax']
                    
            w_plot = np.linspace(xmin,xmax,500)
            g_plot = np.asarray([self.g(s) for s in w_plot])
            ax.plot(w_plot,g_plot,color = 'k',linewidth = 2,zorder = 2)
                
            # set viewing limits
            ymin = min(g_plot)
            ymax = max(g_plot)
            ygap = (ymax - ymin)*0.2
            ymin -= ygap
            ymax += ygap
            ax.set_ylim([ymin,ymax])
                
            # clean up panel
            ax.axhline(y=0, color='k',zorder = 1,linewidth = 0.25)
            ax.axvline(x=0, color='k',zorder = 1,linewidth = 0.25)
            ax.set_xlabel(r'$w$',fontsize = 13)
            ax.set_ylabel(r'$g(w)$',fontsize = 13,rotation = 0,labelpad = 25)   
                
            # function single-input, plot input and evaluation points on function
            for j in range(len(self.w_hist)):  
                w_val = self.w_hist[j]
                g_val = self.g(w_val)
            
                ax.scatter(w_val,g_val,s = 90,c = colorspec[j],edgecolor = 'k',linewidth = 0.5*((1/(float(j) + 1)))**(0.4),zorder = 3,marker = 'X')            # evaluation on function
                ax.scatter(w_val,0,s = 90,facecolor = colorspec[j],edgecolor = 'k',linewidth = 0.5*((1/(float(j) + 1)))**(0.4), zorder = 3)
                    
                ax2.scatter(j,g_val,s = 30,c = colorspec[j],edgecolor = 'k',linewidth = 0.7,zorder = 3)            # plot point of tangency
                    
                # plot connector between points for visualization purposes
                if j > 0:
                    w_old = self.w_hist[j-1][0]
                    w_new = self.w_hist[j][0]
                    g_old = self.g(w_old)
                    g_new = self.g(w_new)
     
                    ax2.plot([j-1,j],[g_old,g_new],color = colorspec[j],linewidth = 2,alpha = 1,zorder = 2)      # plot approx
                    ax2.plot([j-1,j],[g_old,g_new],color = 'k',linewidth = 2.5,alpha = 1,zorder = 1)      # plot approx
      

        # clean panels
        ax2.axhline(y=0, color='k',zorder = 0,linewidth = 0.5)
        ax2.set_xlabel('iteration',fontsize = 12)
        ax2.set_ylabel(r'$g(w)$',fontsize = 12,rotation = 0,labelpad = 25)
            
        ax.set(aspect = 'equal')
        a = ax.get_position()
        yr = ax.get_position().y1 - ax.get_position().y0
        xr = ax.get_position().x1 - ax.get_position().x0
        aspectratio=1.25*xr/yr# + min(xr,yr)
        ratio_default=(ax2.get_xlim()[1]-ax2.get_xlim()[0])/(ax2.get_ylim()[1]-ax2.get_ylim()[0])
        ax2.set_aspect(ratio_default*aspectratio)
            
        # plot
        plt.show()    
Exemplo n.º 17
0
    def fit(self, **kwargs):
        # basic parameters for gradient descent run (default algorithm)
        max_its = 500
        alpha_choice = 10**(-1)
        self.w_init = self.initializer()
        optimizer = 'gradient_descent'
        epsilon = 10**(-10)

        # set parameters by hand
        if 'max_its' in kwargs:
            self.max_its = kwargs['max_its']
        if 'alpha_choice' in kwargs:
            self.alpha_choice = kwargs['alpha_choice']
        if 'optimizer' in kwargs:
            optimizer = kwargs['optimizer']
        if 'epsilon' in kwargs:
            epsilon = kwargs['epsilon']
        if 'init' in kwargs:
            print('here')
            self.w_init = kwargs['init']

        # batch size for gradient descent?
        self.num_pts = np.size(self.y_train)
        self.batch_size = np.size(self.y_train)
        if 'batch_size' in kwargs:
            self.batch_size = kwargs['batch_size']

        # optimize
        weight_history = []

        # run gradient descent
        if optimizer == 'gradient_descent':
            weight_history = optimizers.gradient_descent(
                self.cost, self.alpha_choice, self.max_its, self.w_init,
                self.num_pts, self.batch_size)

        if optimizer == 'newtons_method':
            weight_history = optimizers.newtons_method(self.cost,
                                                       self.max_its,
                                                       self.w_init,
                                                       self.num_pts,
                                                       self.batch_size,
                                                       epsilon=epsilon)

        # compute training and validation cost histories
        train_cost_history = [
            self.cost(v, np.arange(np.size(self.y_train)))
            for v in weight_history
        ]
        valid_cost_history = [
            self.valid_cost(v, np.arange(np.size(self.y_valid)))
            for v in weight_history
        ]

        # store all new histories
        self.weight_histories.append(weight_history)
        self.train_cost_histories.append(train_cost_history)
        self.valid_cost_histories.append(valid_cost_history)

        # if classification produce count history
        if self.cost_name == 'softmax' or self.cost_name == 'perceptron' or self.cost_name == 'multiclass_softmax' or self.cost_name == 'multiclass_perceptron':
            train_count_history = [self.counter(v) for v in weight_history]
            valid_count_history = [
                self.valid_counter(v) for v in weight_history
            ]

            # store count history
            self.train_count_histories.append(train_count_history)
            self.valid_count_histories.append(valid_count_history)
Exemplo n.º 18
0
    def animate_it_3d(self, w_hist, **kwargs):
        self.w_hist = w_hist

        ##### setup figure to plot #####
        # initialize figure
        fig = plt.figure(figsize=(8, 3))
        artist = fig

        # create subplot with 3 panels, plot input function in center plot
        gs = gridspec.GridSpec(1, 2, width_ratios=[2, 1])
        ax1 = plt.subplot(gs[0], projection='3d')
        ax2 = plt.subplot(gs[1])

        # produce color scheme
        s = np.linspace(0, 1, len(self.w_hist[:round(len(self.w_hist) / 2)]))
        s.shape = (len(s), 1)
        t = np.ones(len(self.w_hist[round(len(self.w_hist) / 2):]))
        t.shape = (len(t), 1)
        s = np.vstack((s, t))
        self.colorspec = []
        self.colorspec = np.concatenate((s, np.flipud(s)), 1)
        self.colorspec = np.concatenate((self.colorspec, np.zeros(
            (len(s), 1))), 1)

        # seed left panel plotting range
        viewmax = 3
        if 'viewmax' in kwargs:
            viewmax = kwargs['viewmax']
        r = np.linspace(-viewmax, viewmax, 200)

        # create grid from plotting range
        x1_vals, x2_vals = np.meshgrid(r, r)
        x1_vals.shape = (len(r)**2, 1)
        x2_vals.shape = (len(r)**2, 1)

        x1_vals.shape = (np.size(r), np.size(r))
        x2_vals.shape = (np.size(r), np.size(r))

        # seed left panel view
        view = [20, 50]
        if 'view' in kwargs:
            view = kwargs['view']

        # set zaxis to the left
        self.move_axis_left(ax1)

        # start animation
        num_frames = len(self.w_hist)
        print('starting animation rendering...')

        def animate(k):
            # clear panels
            ax1.cla()

            # set axis in left panel
            self.move_axis_left(ax1)

            # current color
            color = self.colorspec[k]

            # print rendering update
            if np.mod(k + 1, 25) == 0:
                print('rendering animation frame ' + str(k + 1) + ' of ' +
                      str(num_frames))
            if k == num_frames - 1:
                print('animation rendering complete!')
                time.sleep(1.5)
                clear_output()

            ###### make left panel - plot data and fit ######
            # initialize fit
            w = self.w_hist[k]

            # reshape and plot the surface, as well as where the zero-plane is
            y_fit = w[0] + w[1] * x1_vals + w[2] * x2_vals

            # plot cost surface
            ax1.plot_surface(x1_vals,
                             x2_vals,
                             y_fit,
                             alpha=0.1,
                             color=color,
                             rstride=25,
                             cstride=25,
                             linewidth=0.25,
                             edgecolor='k',
                             zorder=2)

            # scatter data
            self.scatter_pts(ax1)
            #ax1.view_init(view[0],view[1])

            # plot connector between points for visualization purposes
            if k == 0:
                w_new = self.w_hist[k]
                g_new = self.least_squares(w_new)[0]
                ax2.scatter(k,
                            g_new,
                            s=0.1,
                            color='w',
                            linewidth=2.5,
                            alpha=0,
                            zorder=1)  # plot approx

            if k > 0:
                w_old = self.w_hist[k - 1]
                w_new = self.w_hist[k]
                g_old = self.least_squares(w_old)[0]
                g_new = self.least_squares(w_new)[0]

                ax2.plot([k - 1, k], [g_old, g_new],
                         color=color,
                         linewidth=2.5,
                         alpha=1,
                         zorder=2)  # plot approx
                ax2.plot([k - 1, k], [g_old, g_new],
                         color='k',
                         linewidth=3.5,
                         alpha=1,
                         zorder=1)  # plot approx

            # set viewing limits for second panel
            ax2.axhline(y=0, color='k', zorder=0, linewidth=0.5)
            ax2.set_xlabel('iteration', fontsize=12)
            ax2.set_ylabel(r'$g(\mathbf{w})$',
                           fontsize=12,
                           rotation=0,
                           labelpad=25)
            ax2.set_xlim([-0.5, len(self.w_hist)])

            # set axis in left panel
            self.move_axis_left(ax1)

            return artist,

        anim = animation.FuncAnimation(fig,
                                       animate,
                                       frames=num_frames,
                                       interval=num_frames,
                                       blit=True)

        return (anim)
Exemplo n.º 19
0
def train(x,y,**kwargs):    
    # get and run optimizer to solve two-class problem
    N = np.shape(x)[0]
    C = np.size(np.unique(y))
    max_its = 100; alpha_choice = 1; cost_name = 'softmax'; w = 0.1*np.random.randn(N+1,1); optimizer = 'gradient_descent';
    
    # switches for user choices
    if 'max_its' in kwargs:
        max_its = kwargs['max_its']
    if 'alpha_choice' in kwargs:
        alpha_choice = kwargs['alpha_choice']
    if 'cost_name' in kwargs:
        cost_name = kwargs['cost_name']
    if 'w' in kwargs:
        w = kwargs['w']
    if 'optimizer' in kwargs:
        optimizer = kwargs['optimizer']
    epsilon = 10**(-7)
    if 'epsilon' in kwargs:
        epsilon = kwargs['epsilon']
    
    # loop over subproblems and solve
    weight_histories = []
    for c in range(0,C):
        # prepare temporary C vs notC sub-probem labels
        y_temp = copy.deepcopy(y)
        ind = np.argwhere(y_temp.astype(int) == c)
        ind = ind[:,1]
        ind2 = np.argwhere(y_temp.astype(int) != c)
        ind2 = ind2[:,1]
        y_temp[0,ind] = 1
        y_temp[0,ind2] = -1

        # store best weight for final classification 
        cost = cost_lib.choose_cost(x,y_temp,cost_name)
        
        # run optimizer
        weight_history = 0; cost_history = 0;
        if optimizer == 'gradient_descent':
            weight_history,cost_history = optimizers.gradient_descent(cost,alpha_choice,max_its,w)
        if optimizer == 'newtons_method':
            weight_history,cost_history = optimizers.newtons_method(cost,max_its,w=w,epsilon = epsilon)

        # store each weight history
        weight_histories.append(copy.deepcopy(weight_history))
        
    # combine each individual classifier weights into single weight 
    # matrix per step
    R = len(weight_histories[0])
    combined_weights = []
    for r in range(R):
        a = []
        for c in range(C):
            a.append(weight_histories[c][r])
        a = np.array(a).T
        a = a[0,:,:]
        combined_weights.append(a)
        
    # run combined weight matrices through fusion rule to calculate
    # number of misclassifications per step
    counter = cost_lib.choose_cost(x,y,'multiclass_counter')
    count_history = [counter(v) for v in combined_weights]
        
    return combined_weights, count_history
Exemplo n.º 20
0
    def newtons_method(self, g, win, **kwargs):
        # flatten gradient for simpler-written descent loop
        self.g, unflatten, w = flatten_func(g, win)

        self.grad = compute_grad(self.g)
        self.hess = compute_hess(self.g)

        # parse optional arguments
        max_its = 20
        if 'max_its' in kwargs:
            max_its = kwargs['max_its']
        self.epsilon = 10**-10
        if 'epsilon' in kwargs:
            self.epsilon = kwargs['epsilon']
        verbose = True
        if 'verbose' in kwargs:
            verbose = kwargs['verbose']
        output = 'history'
        if 'output' in kwargs:
            output = kwargs['output']
        self.counter = copy.deepcopy(self.g)
        if 'counter' in kwargs:
            counter = kwargs['counter']
            self.counter, unflatten, w = flatten_func(counter, win)

        # create container for weight history
        w_hist = []
        w_hist.append(unflatten(copy.deepcopy(w)))

        # start newton's method loop
        if verbose == True:
            print('starting optimization...')
        geval_old = self.g(w)

        self.w_best = unflatten(copy.deepcopy(w))
        g_best = self.counter(w)

        w_hist = []
        if output == 'history':
            w_hist.append(unflatten(w))

        # loop
        for k in range(max_its):
            # compute gradient and hessian
            grad_val = self.grad(w)
            hess_val = self.hess(w)
            hess_val.shape = (np.size(w), np.size(w))

            # solve linear system for weights
            C = hess_val + self.epsilon * np.eye(np.size(w))
            w = np.linalg.solve(C, np.dot(C, w) - grad_val)

            # eject from process if reaching singular system
            geval_new = self.g(w)
            if k > 2 and geval_new > geval_old:
                print('singular system reached')
                time.sleep(1.5)
                clear_output()
                if output == 'history':
                    return w_hist
                elif output == 'best':
                    return self.w_best
            else:
                geval_old = geval_new

            # record current weights
            if output == 'best':
                if self.g(w) < g_best:
                    g_best = self.counter(w)

                    self.w_best = copy.deepcopy(unflatten(w))

            w_hist.append(unflatten(w))

        if verbose == True:
            print('...optimization complete!')
            time.sleep(1.5)
            clear_output()
        if output == 'best':
            return self.w_best
        elif output == 'history':
            return w_hist
Exemplo n.º 21
0
 def least_squares(self,w):
     cost = np.sum((self.model(self.x,w) - self.y)**2)
     return cost/float(np.size(self.y))
Exemplo n.º 22
0
    pos = np.array([0,0,0])
    qM = np.eye(3)
    rs = x0[:N]
    ps = x0[N:2*N]
    ys = x0[2*N:]
    ll = link_lengths
    for r,p,y,l in zip(rs,ps,ys,ll):
        pos0 = pos
        pos,qM = fwd(pos,l,r,p,y,qM)
        ax.plot([pos0[0],pos[0]], [pos0[1],pos[1]],[pos0[2],pos[2]])
        print(pos,np.sqrt(((pos-pos0)**2).sum()),qM,'\n')
        

    # plot spheres
    for o in obstacles:
        u = np.linspace(0, 2 * np.pi, 10)
        v = np.linspace(0, np.pi, 10)
        x = o[3] * np.outer(np.cos(u), np.sin(v)) + o[0]
        y = o[3] * np.outer(np.sin(u), np.sin(v)) + o[1]
        z = o[3] * np.outer(np.ones(np.size(u)), np.cos(v)) + o[2]
        ax.plot_surface(x, y, z, color='b')

    # plot goal
    ax.scatter(target[0], target[1], target[2], c='r')

    ax.legend()
    ax.set_xlim(-6,6)
    ax.set_ylim(-6,6)
    ax.set_zlim(-6,6)
    plt.show()
Exemplo n.º 23
0
 def softmax(self,w):
     cost = np.sum(np.log(1 + np.exp(-self.y*self.model(self.x,w))))
     return cost/float(np.size(self.y))
    #( data, loss_func,mn,vn,b1,b2,batch_size, learning_rate,iteration)
    adam = AdamOptimizer(train_x, train_y_exact, w1, w2, loss_func,
                         tuning_params)  #create object
    adam.find_weights()  #train
    (final_w1, final_w2) = adam.get_weights()  #get trained weights

    test_data_x = get_test_data(all_data)  #get test data
    test_data_x = normalize(test_data_x)  #standardize data
    test_data_desired = get_test_data_result(
        all_data)  #get exact outputs for test data

    test_data_predict = forward_pass(
        test_data_x, final_w1, final_w2, final_w3,
        final_w4)  #run forward pass using trained weights

    test_data_size = np.size(test_data_predict, 0)
    for i in range(test_data_size):
        if test_data_predict[i] < 0.5:
            test_data_predict[i] = 0
        else:
            test_data_predict[i] = 1

    confuse = confusion_matrix(test_data_desired, test_data_predict)
    accuracy = confuse.trace() / confuse.sum()
    print(accuracy)

    plt.figure(1, figsize=(15, 6))
    plt.subplot(1, 2, 1)
    x_axis = np.arange(np.size(adam.loss_array, 0))
    plt.plot(x_axis, adam.loss_array, linewidth=3.0, label='loss ')
    plt.legend()
Exemplo n.º 25
0
    def seir(self, y, t, parameters, controls, stochastic=False):
        # define the right hand side of the ode systems given state y, time t, parameters, and controls

        if self.number_group > 1:
            y = y.reshape((10, self.number_group))

        S, E, Q, A, I, H, R, D, Tc, Tu = y

        # q, tau, HFR, kappa, beta, delta, sigma, eta_I, eta_Q, mu, gamma_I, gamma_A, gamma_H, gamma_Q = parameters
        # _, _, _, delta, sigma, eta_I, eta_Q, mu, gamma_I, gamma_A, gamma_H, gamma_Q = parameters
        alpha, q, tau, HFR, kappa, beta, delta, sigma, eta_I, eta_Q, mu, gamma_I, gamma_A, gamma_H, gamma_Q = controls
        # alpha, q, tau, HFR, kappa, beta, _, _, _, _, _, _, _, _, _ = controls

        # alpha = (np.tanh(alpha) + 1)/2
        alpha = self.interpolation(t, self.t_control, alpha)

        # alpha, q, tau, HFR, kappa = [self.interpolation(t, self.t_control, controls[i]) for i in range(self.number_time_dependent_controls)]

        # tau_p = self.interpolation(t - np.max(1./sigma), self.t_control, controls[2])
        # tau_p = self.interpolation(t, self.t_control, controls[2])
        # tau_p = tau

        # IHR = np.divide(kappa, np.max(kappa) + tau_p)

        IHR = kappa

        QHR = ewm(tau, IHR)

        # gamma_A = gamma_I

        pi = self.proportion2factor(IHR, eta_I, gamma_I)
        nu = self.proportion2factor(HFR, mu, gamma_H)
        rho = self.proportion2factor(QHR, eta_Q, gamma_Q)

        contact = self.contact_rate(t)

        # theta_I = 2 - tau
        # theta_A = 1 - tau

        theta_I = 1. - 0 * tau
        theta_A = 1. - 0 * tau
        delta = 1. + 0 * delta

        C_E = ewm(
            1 - alpha,
            ewm(
                1 - q,
                ewm(delta,
                    np.dot(contact, ewm(theta_I, np.divide(I, self.N_total)))))
            + np.dot(contact, ewm(theta_A, np.divide(A, self.N_total))))

        C_Q = ewm(
            1 - alpha,
            ewm(
                q,
                ewm(delta,
                    np.dot(contact, ewm(theta_I, np.divide(I,
                                                           self.N_total))))))

        if stochastic:
            zeros = np.zeros(np.size(S))
            S = np.max([zeros, S], axis=0)
            E = np.max([zeros, E], axis=0)
            Q = np.max([zeros, Q], axis=0)
            A = np.max([zeros, A], axis=0)
            I = np.max([zeros, I], axis=0)
            H = np.max([zeros, H], axis=0)

        P1 = ewm(beta, ewm(C_E, S))
        P2 = ewm(beta, ewm(C_Q, S))
        P3 = ewm(tau, ewm(sigma, E))
        P4 = ewm(1 - tau, ewm(sigma, E))
        P5 = ewm(rho, ewm(eta_Q, Q))
        P6 = ewm(1 - rho, ewm(gamma_Q, Q))
        P7 = ewm(gamma_A, A)
        P8 = ewm(pi, ewm(eta_I, I))
        P9 = ewm(1 - pi, ewm(gamma_I, I))
        P10 = ewm(nu, ewm(mu, H))
        P11 = ewm(1 - nu, ewm(gamma_H, H))

        if stochastic:
            P1 = np.random.poisson(P1)
            P2 = np.random.poisson(P2)
            P3 = np.random.poisson(P3)
            P4 = np.random.poisson(P4)
            P5 = np.random.poisson(P5)
            P6 = np.random.poisson(P6)
            P7 = np.random.poisson(P7)
            P8 = np.random.poisson(P8)
            P9 = np.random.poisson(P9)
            P10 = np.random.poisson(P10)
            P11 = np.random.poisson(P11)

        dS = -P1 - P2
        dE = P1 - P3 - P4
        dQ = P2 - P5 - P6
        dA = P4 - P7
        dI = P3 - P8 - P9
        dH = P8 + P5 - P10 - P11
        dR = P7 + P9 + P11 + P6
        dD = P10
        dTc = P3 + P2  # + quarantined, P2
        dTu = P4
        dydt = np.array([dS, dE, dQ, dA, dI, dH, dR, dD, dTc,
                         dTu]).flatten("C")

        return dydt
def get_test_data(data):  #get all data input for testing from the csv file
    n = np.size(data, 0)
    n = 2 * n / 3
    n = int(np.round(n))
    test_data = data[n:, :-1]
    return test_data
    def point_and_projection(self,point1,point2):
        # generate range for viewing limits
        minx = min(min(self.x[:,0]),min(self.x[:,1]))
        maxx = max(max(self.x[:,0]),max(self.x[:,1]))
        gapx = (maxx - minx)*0.1
        minx -= gapx
        maxx += gapx
        
        # initialize figure
        fig = plt.figure(figsize = (8,4))
        gs = gridspec.GridSpec(1, 2,width_ratios = [1,1]) 

        # setup current axis
        ax = plt.subplot(gs[0],aspect = 'equal');
        ax2 = plt.subplot(gs[1],aspect = 'equal');
        
        ### plot left panel - data, separators, and region coloring
        self.plot_data(ax)
        self.plot_all_separators(ax)        
        
        ### determine projections etc.,
        point = [1] + point1
        point = np.asarray(point)
        point.shape = (len(point),1)
        y = np.dot(self.W,point)
        ind = np.argwhere(y > 0)
        if np.size(ind) == 0:
            num_classes = len(np.unique(self.y))
            ind = np.arange(num_classes).tolist()
        else:
            ind = [v[0] for v in ind]
        point = point[1:]
        ax.scatter(point[0],point[1],c = 'k',edgecolor = 'w',linewidth = 1,s = 90)

        # loop over classifiers and project
        for i in ind:
            # get weights
            w = self.W[i]
            w = np.asarray(w)
            w.shape = (len(w),1)
            w_norm = sum([v**2 for v in w[1:]])

            # make projected point
            add_on = w[0] + sum([v*a for v,a in zip(point,w[1:])])
            add_on /= w_norm
            proj_point = copy.deepcopy(point)
            proj_point -= add_on*w[1:]

            # projected point
            ax.scatter(proj_point[0],proj_point[1],c = self.colors[i],edgecolor = 'k',linewidth = 1,s = 60,zorder = 4,marker = 'X')
                
            # dashed line
            l = np.linspace(proj_point[0],point[0],200)
            b = np.linspace(proj_point[1],point[1],200)
            ax.plot(l,b,linewidth = 1,linestyle = '--',color = 'k',zorder = 3)
            
        # dress panels
        ax.set_xlim(minx,maxx)
        ax.set_ylim(minx,maxx)
        ax.axis('off')

        ### plot left panel - data, separators, and region coloring
        self.plot_data(ax2)
        self.plot_all_separators(ax2)        
        
        ### determine projections etc.,
        point = [1] + point2
        point = np.asarray(point)
        point.shape = (len(point),1)
        y = np.dot(self.W,point)
        ind = np.argwhere(y > 0)
        if np.size(ind) == 0:
            num_classes = len(np.unique(self.y))
            ind = np.arange(num_classes).tolist()
        else:
            ind = [v[0] for v in ind]
        point = point[1:]
        ax2.scatter(point[0],point[1],c = 'k',edgecolor = 'w',linewidth = 1,s = 90)

        # loop over classifiers and project
        for i in ind:
            # get weights
            w = self.W[i]
            w = np.asarray(w)
            w.shape = (len(w),1)
            w_norm = sum([v**2 for v in w[1:]])

            # make projected point
            add_on = w[0] + sum([v*a for v,a in zip(point,w[1:])])
            add_on /= w_norm
            proj_point = copy.deepcopy(point)
            proj_point -= add_on*w[1:]

            # projected point
            ax2.scatter(proj_point[0],proj_point[1],c = self.colors[i],edgecolor = 'k',linewidth = 1,s = 60,zorder = 4,marker = 'X')
                
            # dashed line
            l = np.linspace(proj_point[0],point[0],200)
            b = np.linspace(proj_point[1],point[1],200)
            ax2.plot(l,b,linewidth = 1,linestyle = '--',color = 'k',zorder = 3)
            
        # dress panels
        ax2.set_xlim(minx,maxx)
        ax2.set_ylim(minx,maxx)
        ax2.axis('off')
Exemplo n.º 28
0
def train(x, y, feature_transforms, **kwargs):
    # get and run optimizer to solve two-class problem
    N = np.shape(x)[0]
    C = np.size(np.unique(y))
    max_its = 100
    alpha_choice = 1
    cost_name = 'softmax'
    normalize = 'standard'
    w = 0.1 * np.random.randn(N + 1, 1)

    # switches for user choices
    if 'max_its' in kwargs:
        max_its = kwargs['max_its']
    if 'alpha_choice' in kwargs:
        alpha_choice = kwargs['alpha_choice']
    if 'cost_name' in kwargs:
        cost_name = kwargs['cost_name']
    if 'w' in kwargs:
        w = kwargs['w']
    if 'normalize' in kwargs:
        normalize = kwargs['normalize']

    # loop over subproblems and solve
    weight_histories = []
    for c in range(0, C):
        # prepare temporary C vs notC sub-probem labels
        y_temp = copy.deepcopy(y)
        ind = np.argwhere(y_temp.astype(int) == c)
        ind = ind[:, 0]
        ind2 = np.argwhere(y_temp.astype(int) != c)
        ind2 = ind2[:, 0]
        y_temp[ind] = 1
        y_temp[ind2] = -1

        # run on normalized data
        run = basic_runner.Setup(x,
                                 y_temp,
                                 feature_transforms,
                                 cost_name,
                                 normalize=normalize)
        run.fit(w=w, alpha_choice=alpha_choice, max_its=max_its)

        # store each weight history
        weight_histories.append(run.weight_history)

    # combine each individual classifier weights into single weight
    # matrix per step
    R = len(weight_histories[0])
    combined_weights = []
    for r in range(R):
        a = []
        for c in range(C):
            a.append(weight_histories[c][r])
        a = np.array(a).T
        a = a[0, :, :]
        combined_weights.append(a)

    # run combined weight matrices through fusion rule to calculate
    # number of misclassifications per step
    counter = basic_runner.Setup(x,
                                 y,
                                 feature_transforms,
                                 'multiclass_counter',
                                 normalize=normalize).cost_func
    count_history = [counter(v) for v in combined_weights]

    return combined_weights, count_history
    def solve_2class_subproblems(self,**kwargs):
        # parse args
        max_its = 5
        if 'max_its' in kwargs:
            max_its = kwargs['max_its']
        alpha = 10**-3
        if 'alpha' in kwargs:
            alpha = kwargs['alpha']  
        steplength_rule = 'none'
        if 'steplength_rule' in kwargs:
            steplength_rule = kwargs['steplength_rule']
        version = 'unnormalized'
        if 'version' in kwargs:
            version = kwargs['version'] 
        algo = 'newtons_method'
        if 'algo' in kwargs:
            algo = kwargs['algo']
         
        #### perform all optimizations ###
        self.g = self.softmax
        if 'cost' in kwargs:
            cost = kwargs['cost']
            if cost == 'softmax':
                self.g = self.softmax
            if cost == 'relu':
                self.g = self.relu

        # loop over subproblems and solve
        self.W = []
        num_classes = np.size(np.unique(self.y))
        for i in range(0,num_classes):
            #print ('solving sub-problem number ' + str(i+1))
            # prepare temporary C vs notC sub-probem labels
            self.y_temp = copy.deepcopy(self.y)
            ind = np.argwhere(self.y_temp == (i))
            ind = ind[:,0]
            ind2 = np.argwhere(self.y_temp != (i))
            ind2 = ind2[:,0]
            self.y_temp[ind] = 1
            self.y_temp[ind2] = -1

            # solve the current subproblem
            if algo == 'gradient_descent':# run gradient descent
                w_hist = self.opt.gradient_descent(g = self.g,w = np.random.randn(np.shape(self.x)[1]+1,1),version = version,max_its = max_its, alpha = alpha,steplength_rule = steplength_rule)
            elif algo == 'newtons_method':
                w_hist = self.opt.newtons_method(g = self.g,w = np.random.randn(np.shape(self.x)[1]+1,1),max_its = max_its,epsilon = 10**(-5))
            
            # store best weight for final classification 
            g_count = []
            for j in range(len(w_hist)):
                w = w_hist[j]
                gval = self.g(w)
                g_count.append(gval)
            ind = np.argmin(g_count)
            w = w_hist[ind]
            
            # normalize normal vectors for each classifier
            w_norm = sum([v**2 for v in w[1:]])**(0.5)
            w_1N = [v/w_norm for v in w]
            self.W.append(w_1N)
            
        # reshape
        self.W = np.asarray(self.W)
        self.W.shape = (num_classes,np.shape(self.x)[1] + 1)
Exemplo n.º 30
0
    def static_fig(self, w_hist, **kwargs):
        self.w_hist = w_hist
        ind = -1
        show_path = True
        if np.size(w_hist) == 0:
            show_path = False
        w = 0
        if show_path:
            w = w_hist[ind]

        ##### setup figure to plot #####
        # initialize figure
        fig = plt.figure(figsize=(8, 3))
        artist = fig

        # create subplot with 3 panels, plot input function in center plot
        gs = gridspec.GridSpec(1, 2, width_ratios=[1, 1])
        ax1 = plt.subplot(gs[0])
        ax2 = plt.subplot(gs[1])

        # produce color scheme
        s = np.linspace(0, 1, len(self.w_hist[:round(len(self.w_hist) / 2)]))
        s.shape = (len(s), 1)
        t = np.ones(len(self.w_hist[round(len(self.w_hist) / 2):]))
        t.shape = (len(t), 1)
        s = np.vstack((s, t))
        self.colorspec = []
        self.colorspec = np.concatenate((s, np.flipud(s)), 1)
        self.colorspec = np.concatenate((self.colorspec, np.zeros(
            (len(s), 1))), 1)

        # seed left panel plotting range
        xmin = copy.deepcopy(min(self.x))
        xmax = copy.deepcopy(max(self.x))
        xgap = (xmax - xmin) * 0.1
        xmin -= xgap
        xmax += xgap
        x_fit = np.linspace(xmin, xmax, 300)

        # seed right panel contour plot
        viewmax = 3
        if 'viewmax' in kwargs:
            viewmax = kwargs['viewmax']
        view = [20, 100]
        if 'view' in kwargs:
            view = kwargs['view']
        num_contours = 15
        if 'num_contours' in kwargs:
            num_contours = kwargs['num_contours']

        ### contour plot in right panel ###
        self.contour_plot(ax2, viewmax, num_contours)

        ### make left panel - plot data and fit ###
        # scatter data
        self.scatter_pts(ax1)

        if show_path:
            # initialize fit
            y_fit = np.tanh(w[0] + x_fit * w[1])

            # plot fit to data
            color = self.colorspec[-1]
            ax1.plot(x_fit, y_fit, color=color, linewidth=2)

            # add points to right panel contour plot
            num_frames = len(self.w_hist)
            for k in range(num_frames):
                # current color
                color = self.colorspec[k]

                # current weights
                w = self.w_hist[k]

                ###### make right panel - plot contour and steps ######
                if k == 0:
                    ax2.scatter(w[0],
                                w[1],
                                s=90,
                                facecolor=color,
                                edgecolor='k',
                                linewidth=0.5,
                                zorder=3)
                if k > 0 and k < num_frames:
                    self.plot_pts_on_contour(ax2, k, color)
                if k == num_frames - 1:
                    ax2.scatter(w[0],
                                w[1],
                                s=90,
                                facecolor=color,
                                edgecolor='k',
                                linewidth=0.5,
                                zorder=3)

        plt.show()
Exemplo n.º 31
0
    def conv_layer_testing(self, tensor, kernels, stats):
        # square up tensor into tensor of patches
        tensor = tensor.reshape(np.shape(tensor)[0],
                                int((np.shape(tensor)[1])**(0.5)),
                                int((np.shape(tensor)[1])**(0.5)),
                                order='F')

        # pad tensor
        kernel = kernels[0]
        padded_tensor = self.pad_tensor(tensor, kernel)

        # window tensor
        wind_tensor = self.sliding_window_tensor(padded_tensor,
                                                 kernel,
                                                 stride=1)

        # normalize windows since they touch weights
        a_means = 0
        a_stds = 0
        if np.size(stats) == 0:
            a_means = np.mean(wind_tensor, axis=0)
            a_stds = np.std(wind_tensor, axis=0)
            stats = [a_means, a_stds]
        else:
            a_means = stats[0][0]
            a_stds = stats[0][1]
        wind_tensor = self.normalize(wind_tensor, a_means, a_stds)

        #### compute convolution feature maps / downsample via pooling one map at a time over entire tensor #####
        kernel2 = np.ones((6, 6))
        stride = 3
        new_tensors = []
        for kernel in kernels:
            #### make convolution feature map - via matrix multiplication over windowed tensor
            feature_map = np.dot(wind_tensor, kernel.flatten()[:, np.newaxis])

            # reshape convolution feature map into array
            feature_map.shape = (np.shape(tensor))
            feature_map = np.asarray(feature_map)

            # now shove result through nonlinear activation
            feature_map = self.activation(feature_map)

            #### now pool / downsample feature map, first window then pool on each window
            wind_featmap = self.sliding_window_tensor(feature_map,
                                                      kernel2,
                                                      stride=stride)

            # max pool on each collected patch
            max_pool = np.max(wind_featmap, axis=1)

            # reshape into new tensor
            max_pool.shape = (np.shape(tensor)[0],
                              int((np.shape(max_pool)[0] /
                                   float(np.shape(tensor)[0]))**(0.5)),
                              int((np.shape(max_pool)[0] /
                                   float(np.shape(tensor)[0]))**(0.5)))

            # reshape into new downsampled pooled feature map
            new_tensors.append(max_pool)

        # turn into array
        new_tensors = np.asarray(new_tensors)

        # reshape into final feature vector to touch fully connected layer(s), otherwise keep as is in terms of shape
        new_tensors = new_tensors.swapaxes(0, 1)
        new_tensors = np.reshape(
            new_tensors, (np.shape(new_tensors)[0], np.shape(new_tensors)[1],
                          np.shape(new_tensors)[2] * np.shape(new_tensors)[3]))
        new_tensors = np.reshape(
            new_tensors, (np.shape(new_tensors)[0],
                          np.shape(new_tensors)[1] * np.shape(new_tensors)[2]),
            order='F')

        return new_tensors, stats
Exemplo n.º 32
0
def newtons_method(g, max_its, w, **kwargs):
    # flatten input funciton, in case it takes in matrices of weights
    flat_g, unflatten, w = flatten_func(g, w)

    # compute the gradient / hessian functions of our input function -
    # note these are themselves functions.  In particular the gradient -
    # - when evaluated - returns both the gradient and function evaluations (remember
    # as discussed in Chapter 3 we always ge the function evaluation 'for free' when we use
    # an Automatic Differntiator to evaluate the gradient)
    gradient = value_and_grad(flat_g)
    hess = hessian(flat_g)

    # set numericxal stability parameter / regularization parameter
    epsilon = 10**(-7)
    if 'epsilon' in kwargs:
        beta = kwargs['epsilon']

    # run the newtons method loop
    weight_history = []  # container for weight history
    cost_history = []  # container for corresponding cost function history
    for k in range(max_its):
        # evaluate the gradient, store current weights and cost function value
        cost_eval, grad_eval = gradient(w)
        weight_history.append(unflatten(w))
        cost_history.append(cost_eval)

        # evaluate the hessian
        hess_eval = hess(w)

        # reshape for numpy linalg functionality
        hess_eval.shape = (int(
            (np.size(hess_eval))**(0.5)), int((np.size(hess_eval))**(0.5)))

        # solve second order system system for weight update
        w = w - np.dot(
            np.linalg.pinv(hess_eval + epsilon * np.eye(np.size(w))),
            grad_eval)

    # collect final weights
    weight_history.append(unflatten(w))
    # compute final cost function value via g itself (since we aren't computing
    # the gradient at the final step we don't get the final cost function value
    # via the Automatic Differentiatoor)
    cost_history.append(flat_g(w))
    return weight_history, cost_history

    # gradient descent function - inputs: g (input function), alpha (steplength parameter), max_its (maximum number of iterations), w (initialization)
    def gradient_descent(g, alpha_choice, max_its, w):
        # compute the gradient function of our input function - note this is a function too
        # that - when evaluated - returns both the gradient and function evaluations (remember
        # as discussed in Chapter 3 we always ge the function evaluation 'for free' when we use
        # an Automatic Differntiator to evaluate the gradient)
        gradient = value_and_grad(g)

        # run the gradient descent loop
        weight_history = []  # container for weight history
        cost_history = []  # container for corresponding cost function history
        alpha = 0
        for k in range(1, max_its + 1):
            # check if diminishing steplength rule used
            if alpha_choice == 'diminishing':
                alpha = 1 / float(k)
            else:
                alpha = alpha_choice

            # evaluate the gradient, store current weights and cost function value
            cost_eval, grad_eval = gradient(w)
            weight_history.append(w)
            cost_history.append(cost_eval)

            # take gradient descent step
            w = w - alpha * grad_eval

        # collect final weights
        weight_history.append(w)
        # compute final cost function value via g itself (since we aren't computing
        # the gradient at the final step we don't get the final cost function value
        # via the Automatic Differentiatoor)
        cost_history.append(g(w))
        return weight_history, cost_history