Exemplo n.º 1
0
    def gradient_check(self):

        backend_backup = nn.backend
        nn.set_backend("numpy")
        assert self.cfg.want_dropout == False
        # assert self.cfg[-1].activation==nn.softmax
        assert nn.backend == nn.NumpyBackend

        if self.cfg[0].type=="convolution": x=nn.randn((2,self.cfg[0].shape[0],self.cfg[0].shape[1],self.cfg[0].shape[2]))
        else: x=nn.randn((2,self.cfg[0].shape))

        if self.cfg[self.size-1].type == "dense": 
            t=nn.zeros((2,self.cfg[-1].shape))
            t[0,np.random.randint(self.cfg[-1].shape)] = 1; t[1,np.random.randint(self.cfg[-1].shape)] = 1; #since we are using a trick in cross-entropy cost function in order not to get nan, t values should be either 0 or 1.
            # row_sums = t.sum(axis=1);t = t / row_sums[:, np.newaxis] #for softmax gradient checking, rows should sum up to one.
        else: 
            t=nn.randn((2,self.cfg[-1].shape[0],self.cfg[-1].shape[1],self.cfg[-1].shape[2]))
            # print self.cfg[-1].shape

        epsilon=.00001
        for k in range(0,self.size):
            if self.cfg[k].type == "dense":
                self.weights.randn(.01)
                self.compute_grad(x,t) #is it necessary to have this inside the loop? don't think so. 

                if k==0: continue
                wk,bk=self.weights[k]
                dwk,dbk=self.dweights[k]
                f=self.feedforward(x,t)
                wk[0,0]+=epsilon
                f_new=self.feedforward(x,t)
                df=(f_new-f)
                print k,df/epsilon/dwk[0,0]
                f=self.feedforward(x,t)
                bk[0,0]+=epsilon
                f_new=self.feedforward(x,t)
                df=(f_new-f)
                print k,df/epsilon/dbk[0,0]
            if self.cfg[k].type in ("convolution","deconvolution"):
                self.weights.randn(.01)
                self.compute_grad(x,t)

                if k==0: continue
                wk,bk=self.weights[k]
                dwk,dbk=self.dweights[k]
                f=self.feedforward(x,t)
                # print wk.shape
                wk[0,0,2,0]+=epsilon
                f_new=self.feedforward(x,t)
                df=(f_new-f)
                # print f_new,f
                print k,df/epsilon/dwk[0,0,2,0]
                f=self.feedforward(x,t)
                bk[0,0]+=epsilon
                f_new=self.feedforward(x,t)
                df=(f_new-f)
                print k,df/epsilon/dbk[0,0]

        nn.backend = backend_backup          
Exemplo n.º 2
0
    def __init__(self,cfg,initial_weights=None):
        self.cfg=cfg 
        self.size=len(cfg.num_weights)           
        if initial_weights == None: self.mem = nn.zeros(self.cfg.num_parameters) #it has to be initialized because of wk,bk = self.weights[k] in the self.init_weights(initial_weights)
        # if initial_weights == None: pass
        else: self.mem = initial_weights

        self.weights = [(None,None)];
        for k in range(1,len(cfg)):
            self.weights.append(self.get_weights(k))
Exemplo n.º 3
0
 def __init__(self, cfg, initial_weights=None):
     self.cfg = cfg
     self.size = len(cfg.num_weights)
     if initial_weights == None:
         self.mem = nn.zeros(
             self.cfg.num_parameters
         )  #it has to be initialized because of wk,bk = self.weights[k] in the self.init_weights(initial_weights)
         # if initial_weights == None: pass
     else:
         self.mem = initial_weights
Exemplo n.º 4
0
    def __init__(self, cfg, initial_weights=None):
        self.cfg = cfg
        self.size = len(cfg.num_weights)
        if initial_weights == None:
            self.mem = nn.zeros(
                self.cfg.num_parameters
            )  #it has to be initialized because of wk,bk = self.weights[k] in the self.init_weights(initial_weights)
            # if initial_weights == None: pass
        else:
            self.mem = initial_weights

        self.weights = [(None, None)]
        for k in range(1, len(cfg)):
            self.weights.append(self.get_weights(k))
Exemplo n.º 5
0
    def __init__(self, shape, tied_list=None, tied_type="first"):
        self.shape = shape
        self.index = []

        tied_dict = {}
        if tied_list:
            for pairs in tied_list:
                i = pairs[0]
                j = pairs[1]
                if tied_type == "all":
                    assert shape[i] == shape[j]
                    for k in xrange(len(shape[i])):
                        tied_dict[(j, k)] = (i, k)
                elif tied_type == "first":
                    print shape[i][0]
                    print shape[j][0]
                    assert shape[i][0] == shape[j][0]

                    tied_dict[(j, 0)] = (i, 0)

        i = 0
        for layer_index in xrange(len(self.shape)):
            layer = self.shape[layer_index]
            if layer in [None, [None], [None, None]]:
                self.index.append(None)
                continue
            self.index.append([None] * len(layer))
            for w_index in xrange(len(layer)):
                w = layer[w_index]

                tied = tied_dict.get((layer_index, w_index))
                if tied == None:
                    self.index[layer_index][w_index] = (i,
                                                        i + WeightSet.prod(w))
                    i += WeightSet.prod(w)
                else:
                    self.index[layer_index][w_index] = self.index[tied[0]][
                        tied[1]]

        # print "shape",self.shape
        # print "index",self.index

        self.num_weights = self.index[-1][-1][-1]
        self.mem = nn.zeros(self.num_weights)

        self.weights = []
        for layer_index in xrange(len(self.shape)):
            self.weights.append(self.get_weights(layer_index))
Exemplo n.º 6
0
    def __init__(self,shape, tied_list=None, tied_type = "first"):
        self.shape = shape
        self.index = []

        tied_dict = {}
        if tied_list:
            for pairs in tied_list:
                i = pairs[0]
                j = pairs[1]
                if tied_type == "all":
                    assert shape[i] == shape[j]
                    for k in xrange(len(shape[i])):
                        tied_dict[(j,k)]=(i,k)
                elif tied_type == "first":
                    print shape[i][0]
                    print shape[j][0]                    
                    assert shape[i][0] == shape[j][0]


                    tied_dict[(j,0)]=(i,0)

        i = 0
        for layer_index in xrange(len(self.shape)):
            layer = self.shape[layer_index]
            if layer in [None,[None],[None,None]]:
                self.index.append(None)
                continue
            self.index.append([None]*len(layer))           
            for w_index in xrange(len(layer)):
                w = layer[w_index]

                tied = tied_dict.get((layer_index,w_index))
                if tied == None:
                    self.index[layer_index][w_index] = (i,i+WeightSet.prod(w))
                    i+=WeightSet.prod(w)                    
                else:
                    self.index[layer_index][w_index] = self.index[tied[0]][tied[1]]
                
        # print "shape",self.shape
        # print "index",self.index

        self.num_weights = self.index[-1][-1][-1]
        self.mem = nn.zeros(self.num_weights) 

        self.weights = [];
        for layer_index in xrange(len(self.shape)):
            self.weights.append(self.get_weights(layer_index))
Exemplo n.º 7
0
 def __init__(self,cfg,initial_weights=None):
     self.cfg=cfg 
     self.size=len(cfg.num_weights)           
     if initial_weights == None: self.mem = nn.zeros(self.cfg.num_parameters) #it has to be initialized because of wk,bk = self.weights[k] in the self.init_weights(initial_weights)
     # if initial_weights == None: pass
     else: self.mem = initial_weights
Exemplo n.º 8
0
    def gradient_check(self):

        backend_backup = nn.backend
        nn.set_backend("numpy")
        assert self.cfg.want_dropout == False
        # assert self.cfg[-1].activation==nn.softmax
        assert nn.backend == nn.NumpyBackend

        if self.cfg[0].type == "convolution":
            x = nn.randn((2, self.cfg[0].shape[0], self.cfg[0].shape[1],
                          self.cfg[0].shape[2]))
        else:
            x = nn.randn((2, self.cfg[0].shape))

        if self.cfg[self.size - 1].type == "dense":
            t = nn.zeros((2, self.cfg[-1].shape))
            t[0, np.random.randint(self.cfg[-1].shape)] = 1
            t[1, np.random.randint(self.cfg[-1].shape)] = 1
            #since we are using a trick in cross-entropy cost function in order not to get nan, t values should be either 0 or 1.
            # row_sums = t.sum(axis=1);t = t / row_sums[:, np.newaxis] #for softmax gradient checking, rows should sum up to one.
        else:
            t = nn.randn((2, self.cfg[-1].shape[0], self.cfg[-1].shape[1],
                          self.cfg[-1].shape[2]))
            # print self.cfg[-1].shape

        epsilon = .00001
        for k in range(0, self.size):
            if self.cfg[k].type == "dense":
                self.weights.randn(.01)
                self.compute_grad(
                    x, t
                )  #is it necessary to have this inside the loop? don't think so.

                if k == 0: continue
                wk, bk = self.weights[k]
                dwk, dbk = self.dweights[k]
                f = self.feedforward(x, t)
                wk[0, 0] += epsilon
                f_new = self.feedforward(x, t)
                df = (f_new - f)
                print k, df / epsilon / dwk[0, 0]
                f = self.feedforward(x, t)
                bk[0, 0] += epsilon
                f_new = self.feedforward(x, t)
                df = (f_new - f)
                print k, df / epsilon / dbk[0, 0]
            if self.cfg[k].type in ("convolution", "deconvolution"):
                self.weights.randn(.01)
                self.compute_grad(x, t)

                if k == 0: continue
                wk, bk = self.weights[k]
                dwk, dbk = self.dweights[k]
                f = self.feedforward(x, t)
                # print wk.shape
                wk[0, 0, 2, 0] += epsilon
                f_new = self.feedforward(x, t)
                df = (f_new - f)
                # print f_new,f
                print k, df / epsilon / dwk[0, 0, 2, 0]
                f = self.feedforward(x, t)
                bk[0, 0] += epsilon
                f_new = self.feedforward(x, t)
                df = (f_new - f)
                print k, df / epsilon / dbk[0, 0]

        nn.backend = backend_backup