Пример #1
0
    def next(self):
        if self._index >= len(self._batches):
            raise StopIteration
        s = slice(*(self._batches[self._index]))
        Xbuf = self._X[s,:]
        if Xbuf.dtype == 'uint8':
            if self._Xbuf == None:
                self._Xbuf = bm.empty((self._batchsize,self._X.shape[1]))
            self._Xbuf[:s.stop-s.start,:] = Xbuf  # copy
            Xbuf = self._Xbuf[:s.stop-s.start,:]  # point to copy
            bm.imul(Xbuf,1./255)

        batch = DataFold(Xbuf,self._Y[s,:],self._S[s,:] if self._S != None else None)
        self._index += 1
        return batch
Пример #2
0
    def next(self):
        if self._index >= len(self._batches):
            raise StopIteration
        s = slice(*(self._batches[self._index]))
        Xbuf = self._X[s, :]
        if Xbuf.dtype == 'uint8':
            if self._Xbuf == None:
                self._Xbuf = bm.empty((self._batchsize, self._X.shape[1]))
            self._Xbuf[:s.stop - s.start, :] = Xbuf  # copy
            Xbuf = self._Xbuf[:s.stop - s.start, :]  # point to copy
            bm.imul(Xbuf, 1. / 255)

        batch = DataFold(Xbuf, self._Y[s, :],
                         self._S[s, :] if self._S != None else None)
        self._index += 1
        return batch
Пример #3
0
 def _collect_stats_on_batch(self,batch):
     '''
     Give a particular fold (training/testing) this evaluates the model using
     the current fold, and collects statistics about how the model is performing.
     '''
     X,Y = batch
     model = self.trainer().model
     H = model.eval(batch,want_hidden=True)
     stats = {}
     stats["X"]           = bm.as_numpy(X).copy()
     stats["Y"]           = bm.as_numpy(Y).copy() if not X is Y else stats["X"]
     stats["S"]           = bm.as_numpy(batch.S).copy() if batch.S != None else zeros((0,1),dtype='float32')
     stats["H"]           = [bm.as_numpy(Hi).copy() for Hi in H]  # make a copy of hidden activations
     stats["loss"]        = model.loss(H[-1],Y)  # scalar loss value
     stats["regularizer"] = model.regularizer(H) # scalar hidden unit regularization penalty
     stats["penalty"]     = model.penalty()      # scalar weight penalty
     if self.trainer().task() == "classification":
         stats["error rate"] = 100*count_nonzero(array(argmax(H[-1],axis=1)) != argmax(Y,axis=1)) / float(batch.size)
     return stats
Пример #4
0
 def __init__(self,X,Y,Xshape=None,Yshape=None,Xrange=None,Yrange=None,shuffle=True,max_batchsize=1):
     if shuffle:
         perm = random.permutation(X.shape[0])
         X[:] = X[perm]
         Y[:] = Y[perm]
     self._X = bm.asarray(X)
     self._Y = bm.asarray(Y)
     self._size  = X.shape[0]
     self.max_batchsize = max_batchsize
     self.Xshape = Xshape or (1,X.shape[1])
     self.Yshape = Yshape or (1,Y.shape[1])
     self.Xdim   = X.shape[1]
     self.Ydim   = Y.shape[1]
     self.Xrange = Xrange or (X.min(axis=0),X.max(axis=0))
     self.Yrange = Yrange or (Y.min(axis=0),Y.max(axis=0))
     rs = self._rowslice(0,self._size)
     self.train = DataFold(X[rs,:],Y[rs,:])
     self.valid = DataFold(X[0:0,:],Y[0:0,:])
     self.test  = DataFold(X[0:0,:],Y[0:0,:])
Пример #5
0
 def log(self, event, stats):
     if self._direction == "input":
         # Filters going into first layer of hidden units
         W = self._model.weights[0].W
         W = bm.as_numpy(W).copy()
         W = W.reshape(self._featshape + tuple([-1]))
     else:
         # Templates going out of final layer of hidden units
         W = self._model.weights[-1].W
         W = bm.as_numpy(W).copy().transpose()
         W = W.reshape(self._featshape + tuple([-1]))
     self._feat = W  # col[i] contains weights entering unit i in first hidden layer
     self._featrange = (min(W.ravel()), max(W.ravel()))
     self._dirty = True
     if event == "epoch" and self._sorted and (stats["epoch"] < 5):
         # Sort by decreasing L2 norm
         ranks = [-sum(self._feat[:, :, j].ravel() ** 2) for j in range(self._feat.shape[2])]
         self._ordering = argsort(ranks)
     if self._ordering != None:
         self._feat = self._feat[:, :, self._ordering]
Пример #6
0
    def __init__(self,
                 X,
                 Y,
                 Xshape=None,
                 Yshape=None,
                 Xrange=None,
                 Yrange=None,
                 shuffle=True,
                 max_batchsize=1):
        if isinstance(X, dict):
            Xtest = X['test']
            Ytest = Y['test']
            X = X['train']
            Y = Y['train']
        else:
            Xtest = X[0, :]  # empty
            Ytest = Y[0, :]  # empty

        if shuffle:
            perm = random.permutation(X.shape[0])
            X = take(X, perm, axis=0)
            if not (X is Y):
                Y = take(Y, perm, axis=0)
        t0 = now()
        self._X = bm.asarray(X)
        self._Y = bm.asarray(Y) if not (X is Y) else self._X
        self._Xtest = bm.asarray(Xtest)
        self._Ytest = bm.asarray(Ytest) if not (X is Y) else self._Xtest
        if bm.backend_name == "gnumpy":
            print "Host->Device transfer of dataset took %.3fs" % (now() - t0)
        self._size = X.shape[0]
        self._Xrescale = (1., 0.)  #scale,bias
        self.max_batchsize = max_batchsize
        self.Xshape = Xshape or (1, X.shape[1])
        self.Yshape = Yshape or (1, Y.shape[1])
        self.Xdim = X.shape[1]
        self.Ydim = Y.shape[1]
        self.Xrange = Xrange or (X.min(axis=0), X.max(axis=0))
        self.Yrange = Yrange or (Y.min(axis=0), Y.max(axis=0))
        if not isscalar(self.Xrange[0]):
            self.Xrange = (bm.asarray(self.Xrange[0]).reshape(
                (1, -1)), bm.asarray(self.Xrange[1]).reshape((1, -1)))
        if not isscalar(self.Yrange[0]):
            self.Yrange = (bm.asarray(self.Yrange[0]).reshape(
                (1, -1)), bm.asarray(self.Yrange[1]).reshape((1, -1)))
        rs = self._rowslice(0, self._size)
        self.train = DataFold(self._X[rs, :], self._Y[rs, :])
        self.valid = DataFold(self._X[0:0, :], self._Y[0:0, :])
        self.test = DataFold(self._Xtest, self._Ytest)
Пример #7
0
 def log(self,event,stats):
     if self._direction == 'input':
         # Filters going into first layer of hidden units
         W,b = self._model.weights[0]
         W = bm.as_numpy(W).copy().transpose()
         #W += bm.as_numpy(b).transpose()
         W = W.reshape(tuple([-1]) + self._featshape)
     else:
         # Templates going out of final layer of hidden units
         W = self._model.weights[-1].W
         W = bm.as_numpy(W).copy()
         W = W.reshape(tuple([-1]) + self._featshape)
     self._feat  = W # col[i] contains weights entering unit i in first hidden layer
     self._featrange = (W.ravel().min(),W.ravel().max())
     self._dirty = True
     if event == 'epoch':
         self._update_count += 1
         if self._sorted and self._update_count <= 25:
             # Sort by decreasing variance
             ranks = [-var(self._feat[j,:,:].ravel()) for j in range(self._feat.shape[0])]
             self._ordering = argsort(ranks)
     if self._ordering != None:
         self._feat = self._feat[self._ordering,:,:]
Пример #8
0
 def _collect_stats_on_batch(self, batch):
     """
     Give a particular fold (training/testing) this evaluates the model using
     the current fold, and collects statistics about how the model is performing.
     """
     X, Y = batch
     model = self.trainer().model
     H = model.eval(X, want_hidden=True)
     stats = {}
     stats["H"] = [bm.as_numpy(Hi).copy() for Hi in H]  # make a copy of hidden activations
     stats["loss"] = model.loss(H[-1], Y)  # scalar loss value
     stats["regularizer"] = model.regularizer(H)  # scalar hidden unit regularization penalty
     stats["penalty"] = model.penalty()  # scalar weight penalty
     if self.trainer().task() == "classification":
         stats["error rate"] = (
             100 * count_nonzero(array(argmax(H[-1], axis=1)) != argmax(Y, axis=1)) / float(batch.size)
         )
     return stats
Пример #9
0
    def __init__(self,X,Y,Xshape=None,Yshape=None,Xrange=None,Yrange=None,shuffle=True,max_batchsize=1):
        if isinstance(X,dict):
            Xtest = X['test']
            Ytest = Y['test']
            X = X['train']
            Y = Y['train']
        else:
            Xtest = X[0,:]   # empty
            Ytest = Y[0,:]   # empty

        if shuffle:
            perm = random.permutation(X.shape[0])
            X = take(X,perm,axis=0)
            if not (X is Y):
                Y = take(Y,perm,axis=0)
        t0 = now()
        self._X = bm.asarray(X)
        self._Y = bm.asarray(Y) if not (X is Y) else self._X
        self._Xtest = bm.asarray(Xtest)
        self._Ytest = bm.asarray(Ytest) if not (X is Y) else self._Xtest
        if bm.backend_name == "gnumpy":
            print "Host->Device transfer of dataset took %.3fs" % (now()-t0)
        self._size  = X.shape[0]
        self._Xrescale = (1.,0.) #scale,bias
        self.max_batchsize = max_batchsize
        self.Xshape = Xshape or (1,X.shape[1])
        self.Yshape = Yshape or (1,Y.shape[1])
        self.Xdim   = X.shape[1]
        self.Ydim   = Y.shape[1]
        self.Xrange = Xrange or (X.min(axis=0),X.max(axis=0))
        self.Yrange = Yrange or (Y.min(axis=0),Y.max(axis=0))
        if not isscalar(self.Xrange[0]):
            self.Xrange = (bm.asarray(self.Xrange[0]).reshape((1,-1)),bm.asarray(self.Xrange[1]).reshape((1,-1)))
        if not isscalar(self.Yrange[0]):
            self.Yrange = (bm.asarray(self.Yrange[0]).reshape((1,-1)),bm.asarray(self.Yrange[1]).reshape((1,-1)))
        rs = self._rowslice(0,self._size)
        self.train = DataFold(self._X[rs,:],self._Y[rs,:])
        self.valid = DataFold(self._X[0:0,:],self._Y[0:0,:])
        self.test  = DataFold(self._Xtest,self._Ytest)
Пример #10
0
 def __init__(self, master, size, dpi, data):
     Figure.__init__(
         self,
         figsize=(size[0] / dpi, size[1] / dpi),
         dpi=dpi,
         facecolor="w",
         edgecolor="b",
         frameon=True,
         linewidth=0,
     )
     FigureCanvas(self, master=master)
     self.master = master
     self._dirty = True
     self._fold = "test" if data["test"].size > 0 else "train"
     self._indices = rnd.sample(arange(data[self._fold].size), minimum(data[self._fold].size, 50))  # 256))
     self._targets = (
         bm.as_numpy(data[self._fold].Y[self._indices, :])
         .transpose()
         .reshape(data.Yshape + tuple([len(self._indices)]))
     )
     self._outputs = None
     self._outshape = data.Yshape
     self._outrange = data.Yrange
     self.add_subplot(111, axisbg="w")
Пример #11
0
    def rescale(self, Xrange, Yrange):
        '''
        Rescales the entire dataset so that all inputs X lie within (Xrange[0],Xrange[1])
        and all targets Y lie within (Yrange[0],Yrange[1]).
        The same scaling factor is applied to all folds.
        '''
        if Xrange != self.Xrange and self._X.dtype != 'uint8':
            Xscale = self.Xrange[1] - self.Xrange[0]
            if isscalar(Xscale):
                Xscale = (Xrange[1] - Xrange[0]) / maximum(1e-5, Xscale)
            else:
                bm.maximum(Xscale, 1e-5, out=Xscale)
                bm.reciprocal(Xscale, out=Xscale)
                bm.multiply(Xscale, Xrange[1] - Xrange[0], out=Xscale)

            bm.isub(self._X, self.Xrange[0])
            bm.imul(self._X, Xscale)
            bm.iadd(self._X, Xrange[0])

        if Yrange != self.Yrange and not (self._X is self._Y):
            Yscale = self.Yrange[1] - self.Yrange[0]
            if isscalar(Yscale):
                Yscale = (Yrange[1] - Yrange[0]) / maximum(1e-5, Yscale)
            else:
                bm.maximum(Yscale, 1e-5, out=Yscale)
                bm.reciprocal(Yscale, out=Yscale)
                bm.multiply(Yscale, Yrange[1] - Yrange[0], out=Yscale)
            bm.isub(self._Y, self.Yrange[0])
            bm.imul(self._Y, Yscale)
            bm.iadd(self._Y, Yrange[0])

        self.Xrange = Xrange
        self.Yrange = Yrange
Пример #12
0
    def __init__(self,trainer,window_size):
        Tk.Frame.__init__(self)
        self._unique_id = 0

        # Set up a 2x2 grid, where each cell will have its own kind of figure
        self.master.rowconfigure(0,weight=1)
        self.master.rowconfigure(1,weight=1)
        self.master.rowconfigure(2,weight=1)
        self.master.rowconfigure(3,weight=1)
        self.master.rowconfigure(4,weight=1)
        self.master.columnconfigure(0,weight=1)
        self.master.columnconfigure(1,weight=1)
        self.master.columnconfigure(2,weight=1)
        dpi = 80.0
        self.plots = {}
        if window_size == "compact":
            col0_wd = 300
            col1_wd = 300
            row0_ht = 200
            row1_ht = 100
        else:
            col0_wd = 900
            col1_wd = 770
            row0_ht = 345
            row1_ht = 470

        # Add error plot in top-left cell
        self.plots["errors"] = TrainingReportErrorPlot(self.master,(col0_wd,row0_ht),dpi,trainer.task())
        self.plots["errors"].canvas.get_tk_widget().grid(row=0,column=0,sticky=Tk.N+Tk.S+Tk.E+Tk.W)

        has_input_feat  = trainer.data.Xshape[0] > 1 and trainer.data.Xshape[1]
        has_output_feat = trainer.data.Yshape[0] > 1 and trainer.data.Yshape[1]

        # Input feature grid in top-right cell
        if has_input_feat:
            self.plots["feat_in"] = TrainingReportFeatureGrid(self.master,(col1_wd,row0_ht),dpi,trainer.model,trainer.data.Xshape,"input")
            self.plots["feat_in"].canvas.get_tk_widget().grid(row=0,column=1,columnspan=(1 if has_output_feat else 2),sticky=Tk.N+Tk.S+Tk.E+Tk.W)

        # Output feature grid in top-right-right cell
        if trainer.data.Yshape[0] > 1 and trainer.data.Yshape[1]:
            self.plots["feat_out"] = TrainingReportFeatureGrid(self.master,(col1_wd,row0_ht),dpi,trainer.model,trainer.data.Yshape,"output")
            self.plots["feat_out"].canvas.get_tk_widget().grid(row=0,column=(2 if has_input_feat else 1),columnspan=(1 if has_input_feat else 2),sticky=Tk.N+Tk.S+Tk.E+Tk.W)

        
        # *Weight* statistics in bottom-left cell
        weights_ref = weakref.ref(trainer.model.weights)
        get_weightmats = lambda event,stats: [bm.as_numpy(abs(layer.W)) for layer in weights_ref()]
        #weight_percentiles =  list(100*(1-linspace(0.1,.9,10)**1.5))
        weight_percentiles =  list(100*(1-linspace(0.05,.95,20)))
        self.plots["wstats"] = TrainingReportPercentiles(self.master,(col0_wd,row1_ht),dpi,get_weightmats,weight_percentiles,True,title="W")
        self.plots["wstats"].canvas.get_tk_widget().grid(row=1,column=0,sticky=Tk.N+Tk.S+Tk.E+Tk.W)

        # *Hidden activity* statistics in bottom-right cell
        get_hidden = lambda event,stats: stats["train"]["H"]
        #hidden_percentiles =  list(100*(1-linspace(0.1,.9,10)**1.5))
        hidden_percentiles =  list(100*(1-linspace(0.05,.95,20)))
        ranges = [layer.f.actual_range() for layer in trainer.model._cfg[1:]]
        self.plots["hstats"] = TrainingReportPercentiles(self.master,(col0_wd,row1_ht),dpi,get_hidden,hidden_percentiles,True,ranges=ranges,title="H")
        
        # For problems with 2D output, draw the target and the reconstruction side by side
        if trainer.data.Yshape[0] > 1 and trainer.data.Yshape[1]:
            if trainer.data["test"].size > 0:
                self.plots["recons_tr"] = TrainingReportReconstructGrid(self.master,(col1_wd,row1_ht),dpi,trainer.data,"train")
                self.plots["recons_tr"].canvas.get_tk_widget().grid(row=1,column=1,rowspan=3,sticky=Tk.N+Tk.S+Tk.E+Tk.W)
                self.plots["recons_te"] = TrainingReportReconstructGrid(self.master,(col1_wd,row1_ht),dpi,trainer.data,"test")
                self.plots["recons_te"].canvas.get_tk_widget().grid(row=1,column=2,rowspan=3,sticky=Tk.N+Tk.S+Tk.E+Tk.W)
            else:
                self.plots["recons_tr"] = TrainingReportReconstructGrid(self.master,(col1_wd,row1_ht),dpi,trainer.data,"train")
                self.plots["recons_tr"].canvas.get_tk_widget().grid(row=1,column=1,columnspan=2,rowspan=3,sticky=Tk.N+Tk.S+Tk.E+Tk.W)

            if self.plots.has_key("hstats"):
                self.plots["hstats"].canvas.get_tk_widget().grid(row=2,column=0,sticky=Tk.N+Tk.S+Tk.E+Tk.W)
        else:
            if self.plots.has_key("hstats"):
                self.plots["hstats"].canvas.get_tk_widget().grid(row=1,column=1,sticky=Tk.N+Tk.S+Tk.E+Tk.W)

        '''
        num_activityplots = min(3,trainer.model.numlayers()-1)
        for k in range(num_activityplots):
            name = 'activity%i' % k
            self.plots[name] = TrainingReportActivityPlot(self.master,(col0_wd/dpi,100/dpi),dpi,k,xaxis=(k==num_activityplots-1))
            if self.plots.has_key("recons_tr"):
                self.plots[name].canvas.get_tk_widget().grid(row=1+k,column=0,columnspan=1,sticky=Tk.N+Tk.S+Tk.W+Tk.E)
            else:
                self.plots[name].canvas.get_tk_widget().grid(row=1+k,column=0,columnspan=3,sticky=Tk.N+Tk.S+Tk.W+Tk.E)
        '''

        #self.master.geometry('2000x900+%d+%d' % (0,100))
        self.master.geometry('600x300+%d+%d' % (0,100))
        self.master.title("Training Report")
        self.update()
        self._redraw_interval = 500
Пример #13
0
    def __init__(self, trainer):
        Tk.Frame.__init__(self)
        self._unique_id = 0

        # Set up a 2x2 grid, where each cell will have its own kind of figure
        self.master.rowconfigure(0, weight=1)
        self.master.rowconfigure(1, weight=1)
        self.master.rowconfigure(2, weight=1)
        self.master.columnconfigure(0, weight=1)
        self.master.columnconfigure(1, weight=1)
        self.master.columnconfigure(2, weight=1)
        dpi = 80.0
        self.plots = {}
        # col0_wd = 320
        # col1_wd = 870
        # row0_ht = 400
        # row1_ht = 220
        col0_wd = 300
        col1_wd = 300
        row0_ht = 200
        row1_ht = 100

        # Add error plot in top-left cell
        self.plots["errors"] = TrainingReportErrorPlot(self.master, (col0_wd, row0_ht), dpi, trainer.task())
        self.plots["errors"].canvas.get_tk_widget().grid(row=0, column=0, sticky=Tk.N + Tk.S + Tk.E + Tk.W)

        # Input feature grid in top-right cell
        if trainer.data.Xshape[0] > 1 and trainer.data.Xshape[1]:
            self.plots["feat_in"] = TrainingReportFeatureGrid(
                self.master, (col1_wd, row0_ht), dpi, trainer.model, trainer.data.Xshape, "input"
            )
            self.plots["feat_in"].canvas.get_tk_widget().grid(row=0, column=1, sticky=Tk.N + Tk.S + Tk.E + Tk.W)

        # Output feature grid in top-right-right cell
        if trainer.data.Yshape[0] > 1 and trainer.data.Yshape[1]:
            self.plots["feat_out"] = TrainingReportFeatureGrid(
                self.master, (col1_wd, row0_ht), dpi, trainer.model, trainer.data.Yshape, "output"
            )
            self.plots["feat_out"].canvas.get_tk_widget().grid(
                row=0, column=(2 if self.plots.has_key("feat_in") else 1), sticky=Tk.N + Tk.S + Tk.E + Tk.W
            )

        # *Weight* statistics in bottom-left cell
        weights_ref = weakref.ref(trainer.model.weights)
        get_weightmats = lambda event, stats: [bm.as_numpy(abs(layer.W)) for layer in weights_ref()]
        weight_percentiles = list(100 * (1 - linspace(0.1, 0.9, 10) ** 1.5))
        self.plots["wstats"] = TrainingReportPercentiles(
            self.master, (col0_wd, row1_ht), dpi, get_weightmats, weight_percentiles, True, title="W"
        )
        self.plots["wstats"].canvas.get_tk_widget().grid(row=1, column=0, sticky=Tk.N + Tk.S + Tk.E + Tk.W)

        # *Hidden activity* statistics in bottom-right cell
        get_hidden = lambda event, stats: stats["train"]["H"]
        hidden_percentiles = list(100 * (1 - linspace(0.1, 0.9, 10) ** 1.5))
        ranges = [layer.f.actual_range() for layer in trainer.model._cfg[1:]]
        self.plots["hstats"] = TrainingReportPercentiles(
            self.master, (col0_wd, row1_ht), dpi, get_hidden, hidden_percentiles, False, ranges=ranges, title="H"
        )
        self.plots["hstats"].canvas.get_tk_widget().grid(row=1, column=1, sticky=Tk.N + Tk.S + Tk.E + Tk.W)

        # For problems with 2D output, draw the target and the reconstruction side by side
        if trainer.data.Yshape[0] > 1 and trainer.data.Yshape[1]:
            self.plots["recons"] = TrainingReportReconstructGrid(self.master, (col1_wd, row1_ht), dpi, trainer.data)
            self.plots["recons"].canvas.get_tk_widget().grid(
                row=1, column=1, rowspan=2, sticky=Tk.N + Tk.S + Tk.E + Tk.W
            )

        self.master.geometry("+%d+%d" % (0, 180))
        self.master.title("Training Report")
        self.update()
        self._redraw_interval = 500
Пример #14
0
    def rescale(self,Xrange,Yrange):
        '''
        Rescales the entire dataset so that all inputs X lie within (Xrange[0],Xrange[1])
        and all targets Y lie within (Yrange[0],Yrange[1]).
        The same scaling factor is applied to all folds.
        '''
        if Xrange != self.Xrange and self._X.dtype != 'uint8':
            Xscale = self.Xrange[1]-self.Xrange[0]
            if isscalar(Xscale):
                Xscale = (Xrange[1]-Xrange[0]) / maximum(1e-5,Xscale)
            else:
                bm.maximum(Xscale,1e-5,out=Xscale)
                bm.reciprocal(Xscale,out=Xscale)
                bm.multiply(Xscale,Xrange[1]-Xrange[0],out=Xscale)

            bm.isub(self._X,self.Xrange[0])
            bm.imul(self._X,Xscale)
            bm.iadd(self._X,Xrange[0])

        if Yrange != self.Yrange and not (self._X is self._Y):
            Yscale = self.Yrange[1]-self.Yrange[0]
            if isscalar(Yscale):
                Yscale = (Yrange[1]-Yrange[0]) / maximum(1e-5,Yscale)
            else:
                bm.maximum(Yscale,1e-5,out=Yscale)
                bm.reciprocal(Yscale,out=Yscale)
                bm.multiply(Yscale,Yrange[1]-Yrange[0],out=Yscale)
            bm.isub(self._Y,self.Yrange[0])
            bm.imul(self._Y,Yscale)
            bm.iadd(self._Y,Yrange[0])

        self.Xrange = Xrange
        self.Yrange = Yrange