示例#1
0
 def __call__(self, inp, mode):
     if mode == 'train' or mode == 'valid':
         inp = inp-inp.mean(0).dimshuffle('x', 0, 1, 2)
         inp = inp / T.sqrt((inp**cast_x(2)).mean(0).dimshuffle('x', 0, 1, 2) + cast_x(0.0001))
         return inp * self.gamma + self.beta
     else :
         mean = self.mean.dimshuffle('x', 0, 1, 2)
         std = T.sqrt(self.var + cast_x(0.0001)).dimshuffle('x', 0, 1, 2)
         beta = self.beta.dimshuffle('x', 0, 1, 2)
         gamma = self.gamma.dimshuffle('x', 0, 1, 2)
         return inp*gamma/std  + beta - mean*gamma/std
示例#2
0
    def learn(self, model_inp, layer_inp, data):

        print('    Learning {}'.format(self.name))

        count = shared_x(0., name='count')
        updates  = [(count, count + cast_x(layer_inp.shape[0]))]
        updates += [(self.mean, self.mean + layer_inp.sum(0))]
        updates += [(self.var, self.var + (layer_inp**2).sum(0))]
        fn = th.function(inputs=[model_inp], updates = updates)
        for i, (example, label) in enumerate(data):
            fn(example)
        self.mean.set_value((self.mean/count).eval())
        self.var.set_value((self.var/count - self.mean**2).eval())

        print('      - mean:  mean(mean) = {:0.2f};  std(mean) = {:0.2f}'.format(float(self.mean.mean().eval()), float(self.mean.std().eval())))
        print('      - var:  mean(var) = {:0.2f};  std(var) = {:0.2f}'.format(float(self.var.mean().eval()), float(self.var.std().eval())))
示例#3
0
    def __call__(self, inp, mode=None):

        corruption_type = self.corruption_type
        corruption_level = self.corruption_level

        if mode != 'train':
            print('corrupt : mode (= {}) != "train"'.format(mode))
            return inp
        elif corruption_level == 0 or corruption_type == None:
            return inp
        elif corruption_type == 'zeromask':
            return self.rng.binomial(
                size=inp.shape, n=1, p=1.0 - corruption_level,
                dtype=float_x) * inp / cast_x(1 - corruption_level)
        elif corruption_type == 'gaussian':
            return self.rng.normal(
                size=inp.shape, avg=0.0, std=corruption_level,
                dtype=float_x) + inp
        else:
            raise ValueError
示例#4
0
    def learn(self, model_inp, layer_inp, data):

        print('    Learning {}'.format(self.__class__.__name__))

        count = shared_x(0., name='count')
        updates = [(count, count + cast_x(layer_inp.shape[0]))]
        updates += [(self.dc, self.dc + layer_inp.mean(3).mean(2).sum(0))]
        updates += [(self.std,
                     self.std + (layer_inp**2).mean(3).mean(2).sum(0))]
        fn = th.function(inputs=[model_inp], updates=updates)
        for i, (example, label) in enumerate(data):
            if i >= self.nb_pretrain_iterations:
                break
            fn(example)
        self.dc.set_value((self.dc / count).eval())
        self.std.set_value(T.sqrt(self.std / count - self.dc**2).eval())

        print('      - dc centering:  mean(dc) = {:0.2f};  std(dc) = {:0.2f}'.
              format(float(self.dc.mean().eval()),
                     float(self.dc.std().eval())))
        print(
            '      - contrast nrm:  mean(std) = {:0.2f};  std(std) = {:0.2f}'.
            format(float(self.std.mean().eval()),
                   float(self.std.std().eval())))
示例#5
0
    def __graph_output(self, epoch):

        self.update_learning_stats_fn()

        ########  Learning statistic associated with optimized parameters
        for param in self.params:

            last_update = self.last_batch_update[param]
            this_update = self.this_batch_update[param]
            init = self.init[param]
            sp = self.subplots[param]
            name = str(param)

            if param.ndim == 1:
                data = param.get_value()
                p005, median, p995 = np.percentile(data, [0.5, 50, 99.5])
                sp[0, 0].hist(remove=True, x=data, bins=35)
                sp[0, 0].set_title('{} at epoch {}'.format(param, epoch),
                                   fontsize=10)
                sp[1, 0].add_point(p005=(epoch, p005),
                                   median=(epoch, median),
                                   p995=(epoch, p995),
                                   std=(epoch, data.std()))

                data = (param - init).eval()
                p005, median, p995 = np.percentile(data, [0.5, 50, 99.5])
                sp[0, 1].hist(remove=True, x=data, bins=35)
                sp[0, 1].set_title('{}-initial'.format(param), fontsize=10)
                sp[1, 1].add_point(p005=(epoch, p005),
                                   median=(epoch, median),
                                   p995=(epoch, p995),
                                   std=(epoch, data.std()))

                data = this_update.get_value()
                p005, median, p995 = np.percentile(data, [0.5, 50, 99.5])
                sp[0, 2].hist(remove=True, x=data, bins=35)
                sp[0, 2].set_title('{} gradient update'.format(param),
                                   fontsize=10)
                sp[1, 2].add_point(p005=(epoch, p005),
                                   median=(epoch, median),
                                   p995=(epoch, p995),
                                   std=(epoch, data.std()))

            elif param.ndim > 1:

                if param.ndim == 2:
                    param = param.T
                    last_update = last_update.T
                    this_update = this_update.T
                    init = init.T

                param = param.flatten(2)
                last_update = last_update.flatten(2)
                this_update = this_update.flatten(2)
                init = init.flatten(2)

                # Norms
                nrm = T.sqrt((param**2).sum(1))
                data = nrm.eval()
                p005, median, p995 = np.percentile(data, [0.5, 50, 99.5])
                sp[0, 0].hist(remove=True, x=data, bins=35)
                sp[0, 0].set_title(
                    r'$\Vert w_i \Vert \/ i \in [1,{}]$ at epoch {}'.format(
                        len(data), epoch),
                    fontsize=10)
                sp[1, 0].add_point(p005=(epoch, p005),
                                   median=(epoch, median),
                                   p995=(epoch, p995),
                                   std=(epoch, data.std()))

                # Orthonormality
                param_nrm = param / nrm[:, None]
                data = T.dot(param_nrm, param_nrm.T).flatten().eval()
                p005, median, p995 = np.percentile(data, [0.5, 50, 99.5])
                sp[0, 1].hist(remove=True, x=data, bins=60)
                sp[0, 1].set_yscale('log', nonposy='clip')
                sp[0, 1].set_title(
                    r'$ {{ \frac{{ {{w_i}}^\intercal w_j }}{{ \Vert w_i \Vert \Vert w_j \Vert }} }} {{\vert}}_{{(t={})}}  \/ i,j \in [1,{}]  $'
                    .format(epoch, int(sqrt(len(data)))),
                    fontsize=10)
                sp[1, 1].add_point(p005=(epoch, p005),
                                   median=(epoch, median),
                                   p995=(epoch, p995),
                                   std=(epoch, data.std()))

                # Rotations with respect to initial state
                cos = (param * init).sum(1)
                nrm = T.sqrt((param**2).sum(1))
                nrm_init = T.sqrt((init**2).sum(1))
                fac = cast_x(180. / np.pi)
                data = (T.arccos(cos / (nrm * nrm_init)) *
                        fac).flatten().eval()
                p005, median, p995 = np.percentile(data, [0.5, 50, 99.5])
                sp[0, 2].hist(remove=True, x=data, bins=35)
                sp[0, 2].set_title(
                    r'$ \measuredangle ( w^{{(t={})}}_i, w^{{(t=0)}}_i ) \/ i \in [1,{}] $'
                    .format(epoch, len(data)),
                    fontsize=10)
                sp[1, 2].add_point(p005=(epoch, p005),
                                   median=(epoch, median),
                                   p995=(epoch, p995),
                                   std=(epoch, data.std()))

                # Update norm
                data = T.sqrt((this_update**2).sum(1)).eval()
                p005, median, p995 = np.percentile(data, [0.5, 50, 99.5])
                sp[0, 3].hist(remove=True, x=data, bins=35)
                sp[0, 3].set_title(
                    r'$\Vert u_i \Vert \/ i \in [1,{}]$ at epoch {}'.format(
                        len(data), epoch),
                    fontsize=10)
                sp[1, 3].add_point(p005=(epoch, p005),
                                   median=(epoch, median),
                                   p995=(epoch, p995),
                                   std=(epoch, data.std()))

                # Update rotation with respect to weight vectors
                cos = (param * this_update).sum(1)
                nrm = T.sqrt((param**2).sum(1))
                nrm_init = T.sqrt((this_update**2).sum(1))
                fac = cast_x(180. / np.pi)
                data = (T.arccos(cos / (nrm * nrm_init)) *
                        fac).flatten().eval()
                p005, median, p995 = np.percentile(data, [0.5, 50, 99.5])
                try:
                    sp[0, 4].hist(remove=True, x=data, bins=35)
                except:
                    print(param)
                    print(data.shape)
                    raise
                sp[0, 4].set_title(
                    r'$ \measuredangle ( w^{{(t={})}}_i, u^{{(t={})}}_i ) \/ i \in [1,{}] $'
                    .format(epoch, epoch, len(data)),
                    fontsize=10)
                sp[1, 4].add_point(p005=(epoch, p005),
                                   median=(epoch, median),
                                   p995=(epoch, p995),
                                   std=(epoch, data.std()))

                # Update rotation if this update with respect to the last
                cos = (this_update * last_update).sum(1)
                nrm_this = T.sqrt((last_update**2).sum(1))
                nrm_last = T.sqrt((this_update**2).sum(1))
                fac = cast_x(180. / np.pi)
                data = (T.arccos(cos / (nrm_this * nrm_last)) *
                        fac).flatten().eval()
                p005, median, p995 = np.percentile(data, [0.5, 50, 99.5])
                sp[0, 5].hist(remove=True, x=data, bins=35)
                sp[0, 5].set_title(
                    r'$ \measuredangle ( u^{{(t={})}}_i, u^{{(t={})}}_i ) \/ i \in [1,{}] $'
                    .format(epoch, epoch - 1, len(data)),
                    fontsize=10)
                sp[1, 5].add_point(p005=(epoch, p005),
                                   median=(epoch, median),
                                   p995=(epoch, p995),
                                   std=(epoch, data.std()))

            else:
                continue

            sp.savefig(join(
                self.output_path, '{}_{}_learning_stats_{}.png'.format({
                    0:
                    'unsupervised',
                    1:
                    'supervised'
                }[self.supervised], self.model_id, name)),
                       dpi=100)

        ########  Learning statistic associated with optimized parameters
        if self.debug_nodes:
            outputs = self.debug_fn()
            for (name, node), data in zip(list(self.debug_nodes.items()),
                                          outputs):
                sp = self.subplots[node]
                data = data.flatten()
                nonzeros = float((data != 0).mean())
                p005, median, p995 = np.percentile(data, [0.5, 50, 99.5])
                sp[0, 0].hist(remove=True, x=data, bins=60)
                sp[0, 0].set_yscale('log', nonposy='clip')
                sp[0, 0].set_title('{} at t={}'.format(name, epoch),
                                   fontsize=6)
                sp[1, 0].add_point(p005=(epoch, p005),
                                   median=(epoch, median),
                                   p995=(epoch, p995),
                                   std=(epoch, data.std()),
                                   nonzero=(epoch, nonzeros))
                sp[1,
                   0].set_title('Non-zero = {:0.4f}%'.format(nonzeros * 100),
                                fontsize=8)
                sp.savefig(join(self.output_path, name + '.png'), dpi=100)
示例#6
0
    def learn(self, inp, trainer, inp_corruption_type=None, inp_corruption_level=0, hid_corruption_type=None, hid_corruption_level=0, cost_weight = cast_x(1), learn_scale_first=False, debug_path=None, nb_frames=None):

        if trainer:
            # Build noisy autoencoder for training
            train_enc = self(inp, inp_corruption_type, inp_corruption_level, 'full')
            train_dec = self.dec(train_enc, hid_corruption_type, hid_corruption_level)
            train_cost = self.cost(inp, train_dec, cost_weight)

            # Build noiseless autoencoder for validation
            valid_enc = self(inp, border_mode = 'full')
            valid_dec = self.dec(valid_enc)
            valid_cost = self.cost(inp, valid_dec, cost_weight)

            # Quick training for weight scaling
            if learn_scale_first:
                lookback = trainer.lookback
                momentum = trainer.momentum
                trainer.lookback = int(ceil(trainer.lookback / 20.))
                trainer.momentum = 0
                trainer([self.scale], train_cost, valid_cost, model_id=self.model_id + '_scaling').learn()
                trainer.lookback = lookback
                trainer.momentum = momentum

            debug_args = dd()
            debug_args.debug_path = debug_path
            debug_args.nb_frames = nb_frames
            debug_args.prefix = 'unsupervised'
            self.trainer = trainer(self.params.values(), train_cost, valid_cost, model_id=self.model_id,
                                   additionnal_updates = self.additionnal_update(),
                                   debug_calls=(self.debug_call, debug_args),
                                   debug_nodes = dd({'unsupervised_'+self.model_id+'_encoder_act_trainset':train_enc}))

        # Learn model
        self.trainer.learn()
示例#7
0
 def cost(self, inp, dec, weights = cast_x(1)):
     return ((cast_x(0.5)*(dec-inp)**2).mean(3).mean(2).mean(1)*weights).mean()
示例#8
0
def conv_normalize(inp):
    return inp / T.sqrt((inp**cast_x(2)).sum(3).sum(2).sum(1)).dimshuffle(
        0, 'x', 'x', 'x') + cast_x(0.00001)